diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -790,6 +790,7 @@ virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads); /// Emits a critical region. @@ -1724,6 +1725,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emits a critical region. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1253,7 +1253,7 @@ CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); + return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(*CS, D.getBeginLoc()); } llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( @@ -2027,11 +2027,11 @@ CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } -void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); @@ -12403,12 +12403,11 @@ llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPSIMDRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -250,6 +250,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emit an implicit/explicit barrier for OpenMP threads. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1164,18 +1164,17 @@ emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } -void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntimeGPU::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; - auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, - NumThreads](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, + CapturedVarsElemTypes, IfCond, NumThreads]( + CodeGenFunction &CGF, PrePostActionTy &Action) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; @@ -1189,25 +1188,49 @@ // TODO: Is that needed? CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); - Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca( - llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()), - "captured_vars_addrs"); - // There's something to share. - if (!CapturedVars.empty()) { - // Prepare for parallel region. Indicate the outlined function. - ASTContext &Ctx = CGF.getContext(); - unsigned Idx = 0; - for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstArrayGEP(CapturedVarsAddrs, Idx); - llvm::Value *PtrV; - if (V->getType()->isIntegerTy()) - PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); - else - PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy); - CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false, - Ctx.getPointerType(Ctx.VoidPtrTy)); - ++Idx; - } + assert(CapturedVars.size() == 1 && + "Expected single aggregate argument to outlined function"); + + // Globalize the single aggregate argument, if needed, or use a local + // alloca, or emit null when there are no arguments. + llvm::Value *AggregateV = CapturedVars[0]; + assert(AggregateV->getType()->isPointerTy() && + "Expected pointer type for aggregate argument."); + + assert(CapturedVarsElemTypes.size() == 1 && + "Expected single element in array of types."); + llvm::Type *PtrElemTy = CapturedVarsElemTypes[0]; + auto &DL = CGM.getDataLayout(); + unsigned AllocSize = DL.getTypeAllocSize(PtrElemTy); + + llvm::CallBase *GlobalPtr = nullptr; + llvm::Value *AggregatePtr = nullptr; + + if (AllocSize) { + llvm::AllocaInst *LocalAlloc = + CGF.CreateTempAlloca(PtrElemTy, ".tmp.outlined.agg.arg"); + llvm::Value *LocalPtr = Bld.CreatePointerCast(LocalAlloc, CGF.VoidPtrTy); + GlobalPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + {llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + GlobalPtr->addRetAttr(llvm::Attribute::get( + CGM.getLLVMContext(), llvm::Attribute::Alignment, + CGM.getContext().getTargetInfo().getNewAlign() / 8)); + + llvm::Value *AllocArgs[] = {LocalPtr, GlobalPtr}; + AggregatePtr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_aggregate_arg), + AllocArgs); + + llvm::Value *CapturedVarVal = Bld.CreateAlignedLoad( + PtrElemTy, AggregateV, DL.getABITypeAlign(PtrElemTy)); + llvm::Value *AggregatePtrCast = Bld.CreatePointerBitCastOrAddrSpaceCast( + AggregatePtr, PtrElemTy->getPointerTo()); + Bld.CreateDefaultAlignedStore(CapturedVarVal, AggregatePtrCast); + } else { + AggregatePtr = llvm::Constant::getNullValue(OMPBuilder.VoidPtr); } llvm::Value *IfCondVal = nullptr; @@ -1223,21 +1246,29 @@ NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty), assert(IfCondVal && "Expected a value"); + assert(AggregatePtr && "Expected non-null aggregate pointer value"); + // Create the parallel call. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *Args[] = { - RTLoc, - getThreadID(CGF, Loc), - IfCondVal, - NumThreadsVal, - llvm::ConstantInt::get(CGF.Int32Ty, -1), - FnPtr, - ID, - Bld.CreateBitOrPointerCast(CapturedVarsAddrs.getPointer(), - CGF.VoidPtrPtrTy), - llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; + llvm::Value *Args[] = {RTLoc, + getThreadID(CGF, Loc), + IfCondVal, + NumThreadsVal, + llvm::ConstantInt::get(CGF.Int32Ty, -1), + FnPtr, + ID, + AggregatePtr}; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_parallel_51), Args); + + if (AllocSize) { + assert(GlobalPtr && "Expected non-null global pointer value"); + // Pop global memory used for argument allocation. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_free_shared), + {GlobalPtr, llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + } }; RegionCodeGenTy RCG(ParallelGen); @@ -3175,7 +3206,6 @@ D.getBeginLoc(), D.getBeginLoc()); const auto *RD = CS.getCapturedRecordDecl(); - auto CurField = RD->field_begin(); Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); @@ -3187,70 +3217,44 @@ Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; - auto CI = CS.capture_begin(); // Use global memory for data sharing. // Handle passing of global args to workers. Address GlobalArgs = - CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables_aggregate), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. - Address SharedArgListAddress = Address::invalid(); - if (CS.capture_size() > 0 || - isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - SharedArgListAddress = CGF.EmitLoadOfPointer( + Address SharedArgAggregateAddress = Address::invalid(); + if (CS.capture_size() > 0) { + SharedArgAggregateAddress = CGF.EmitLoadOfPointer( GlobalArgs, CGF.getContext() - .getPointerType(CGF.getContext().VoidPtrTy) + .getPointerType(CGF.getContext().VoidTy) .castAs()); - } - unsigned Idx = 0; - if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); + // Load the outlined arg aggregate struct. + ASTContext &CGFContext = CGF.getContext(); + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *LB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getLowerBoundVariable()->getExprLoc()); - Args.emplace_back(LB); - ++Idx; - Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); - TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *UB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getUpperBoundVariable()->getExprLoc()); - Args.emplace_back(UB); - ++Idx; - } - if (CS.capture_size() > 0) { + SharedArgAggregateAddress, + CGF.ConvertTypeForMem(RecordPointerTy)->getPointerTo(), + CGF.ConvertTypeForMem(RecordPointerTy)); + llvm::Value *Arg = TypedAddress.getPointer(); + Args.emplace_back(Arg); + } else { + // If there are no captured arguments, use nullptr. ASTContext &CGFContext = CGF.getContext(); - for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { - QualType ElemTy = CurField->getType(); - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); - Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)), - CGF.ConvertTypeForMem(ElemTy)); - llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, - /*Volatile=*/false, - CGFContext.getPointerType(ElemTy), - CI->getLocation()); - if (CI->capturesVariableByCopy() && - !CI->getCapturedVar()->getType()->isAnyPointerType()) { - Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(), - CI->getLocation()); - } - Args.emplace_back(Arg); - } + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); + llvm::Value *Arg = + llvm::Constant::getNullValue(CGF.ConvertTypeForMem(RecordPointerTy)); + Args.emplace_back(Arg); } emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -91,6 +91,19 @@ auto *VD = C.getCapturedVar(); assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE( CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -266,6 +279,19 @@ continue; assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE(CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -323,6 +349,34 @@ return CGM.getSize(SizeInChars); } +void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + QualType RecordTy = getContext().getRecordType(RD); + // Create the aggregate argument struct for the outlined function. + LValue AggLV = MakeAddrLValue( + CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy); + + // Initialize the aggregate with captured values. + auto CurField = RD->field_begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField) { + LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField); + // Initialize for VLA. + if (CurField->hasCapturedVLAType()) { + EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + } else + // Initialize for capturesThis, capturesVariableByCopy, + // capturesVariable + EmitInitializerForField(*CurField, LV, *I); + } + + CapturedVars.push_back(AggLV.getPointer(*this)); + CapturedVarsElemTypes.push_back(ConvertTypeForMem(AggLV.getType())); +} + void CodeGenFunction::GenerateOpenMPCapturedVars( const CapturedStmt &S, SmallVectorImpl &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); @@ -422,6 +476,103 @@ }; } // namespace +static llvm::Function *emitOutlinedFunctionPrologueAggregate( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector> + &LocalAddrs, + llvm::DenseMap> + &VLASizes, + llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc, + StringRef FunctionName) { + const CapturedDecl *CD = CS.getCapturedDecl(); + const RecordDecl *RD = CS.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + CXXThisValue = nullptr; + // Build the argument list. + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGM.getContext(); + Args.append(CD->param_begin(), CD->param_end()); + + // Create the function declaration. + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + auto *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FunctionName, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->setDoesNotThrow(); + F->setDoesNotRecurse(); + + // Generate the function. + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc); + Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); + llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr); + LValue ContextLV = CGF.MakeNaturalAlignAddrLValue( + ContextV, CGM.getContext().getTagDeclType(RD)); + const auto *I = CS.captures().begin(); + for (const FieldDecl *FD : RD->fields()) { + LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD); + // Do not map arguments if we emit function with non-original types. + Address LocalAddr = FieldLV.getAddress(CGF); + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + const VarDecl *CurVD = I->getCapturedVar(); + LocalAddrs.insert({FD, {CurVD, LocalAddr}}); + ++I; + continue; + } + + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg); + } else if (I->capturesVariable()) { + const VarDecl *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(CGF); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs()); + } + LocalAddrs.insert( + {FD, + {Var, Address(ArgAddr.getPointer(), ArgAddr.getElementType(), + Ctx.getDeclAlign(Var))}}); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + const VarDecl *Var = I->getCapturedVar(); + Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD), + Var->getName()); + LValue CopyLVal = + CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl); + + RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation()); + CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal); + + LocalAddrs.insert({FD, {Var, CopyAddr}}); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}}); + } + ++I; + } + + return F; +} + static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector> @@ -606,6 +757,38 @@ return F; } +llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( + const CapturedStmt &S, SourceLocation Loc) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + FunctionArgList Args; + llvm::MapVector> LocalAddrs; + llvm::DenseMap> VLASizes; + StringRef FunctionName = CapturedStmtInfo->getHelperName(); + llvm::Function *F = emitOutlinedFunctionPrologueAggregate( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName); + CodeGenFunction::OMPPrivateScope LocalScope(*this); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + auto retAddrPair = [&LocalAddrPair]() { + return LocalAddrPair.second.second; + }; + LocalScope.addPrivate(LocalAddrPair.second.first, retAddrPair()); + } + } + (void)LocalScope.Privatize(); + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); + FinishFunction(CD->getBodyRBrace()); + return F; +} + llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc) { @@ -1476,9 +1659,8 @@ /// Codegen lambda for appending distribute lower and upper bounds to outlined /// parallel function. This is necessary for combined constructs such as /// 'distribute parallel for' -typedef llvm::function_ref &)> +typedef llvm::function_ref CodeGenBoundParametersTy; } // anonymous namespace @@ -1571,14 +1753,18 @@ OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; + llvm::SmallVector CapturedVarsElemTypes; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk // lower and upper bounds with the pragma 'for' chunking mechanism. // The following lambda takes care of appending the lower and upper bound // parameters when necessary - CodeGenBoundParameters(CGF, S, CapturedVars); - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, - CapturedVars, IfCond, NumThreads); + CodeGenBoundParameters(CGF, S, *CS); + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); + CGF.CGM.getOpenMPRuntime().emitParallelCall( + CGF, S.getBeginLoc(), OutlinedFn, CapturedVars, CapturedVarsElemTypes, + IfCond, NumThreads); } static bool isAllocatableDecl(const VarDecl *VD) { @@ -1594,7 +1780,7 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, - llvm::SmallVectorImpl &) {} + const CapturedStmt &) {} static void emitOMPCopyinClause(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -3149,21 +3335,30 @@ static void emitDistributeParallelForDistributeInnerBoundParams( CodeGenFunction &CGF, const OMPExecutableDirective &S, - llvm::SmallVectorImpl &CapturedVars) { + const CapturedStmt &CS) { const auto &Dir = cast(S); + + // The first captured variable of the captured statement corresponds + // to inner lower bound. + VarDecl *PrevLBCapDecl = CS.captures().begin()->getCapturedVar(); + // The second captured variable corresponds to the inner upper bound. + VarDecl *PrevUBCapDecl = std::next(CS.captures().begin())->getCapturedVar(); + LValue LB = CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); llvm::Value *LBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(LBCast); + CGF.EmitStoreOfScalar(LBCast, CGF.GetAddrOfLocalVar(PrevLBCapDecl), + /*Volatile=*/false, PrevLBCapDecl->getType()); + LValue UB = CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); - llvm::Value *UBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(UBCast); + CGF.EmitStoreOfScalar(UBCast, CGF.GetAddrOfLocalVar(PrevUBCapDecl), + /*Volatile=*/false, PrevUBCapDecl->getType()); } static void @@ -5649,6 +5844,14 @@ LValue IL = EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); + // Emit previous upper, lower bound captured variables, if applicable. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + EmitOMPHelperVar(*this, + cast(S.getPrevLowerBoundVariable())); + EmitOMPHelperVar(*this, + cast(S.getPrevUpperBoundVariable())); + } + OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races @@ -6697,7 +6900,10 @@ OMPTeamsScope Scope(CGF, S); llvm::SmallVector CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::SmallVector CapturedVarsElemTypes; + + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, CapturedVars); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3349,8 +3349,14 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function * + GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, + SourceLocation Loc); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc); + void GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4445,12 +4445,22 @@ Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_teams_distribute_parallel_for: @@ -4501,14 +4511,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/3); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } @@ -4546,14 +4566,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/1); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_update: @@ -9846,23 +9876,23 @@ CombEUB = SemaRef.ActOnFinishFullExpr(CombEUB.get(), /*DiscardedValue*/ false); - const CapturedDecl *CD = cast(AStmt)->getCapturedDecl(); - // We expect to have at least 2 more parameters than the 'parallel' - // directive does - the lower and upper bounds of the previous schedule. - assert(CD->getNumParams() >= 4 && - "Unexpected number of parameters in loop combined directive"); - - // Set the proper type for the bounds given what we learned from the - // enclosed loops. - ImplicitParamDecl *PrevLBDecl = CD->getParam(/*PrevLB=*/2); - ImplicitParamDecl *PrevUBDecl = CD->getParam(/*PrevUB=*/3); - - // Previous lower and upper bounds are obtained from the region - // parameters. - PrevLB = - buildDeclRefExpr(SemaRef, PrevLBDecl, PrevLBDecl->getType(), InitLoc); - PrevUB = - buildDeclRefExpr(SemaRef, PrevUBDecl, PrevUBDecl->getType(), InitLoc); + const CapturedStmt *CS = cast(AStmt); + + // Refer to captured variables from the associated captured statement of + // the combined directive. First captured variable is the previous lower + // bound, second is the previous upper bound. + VarDecl *PrevLBCapDecl = CS->captures().begin()->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous LB in combined directive"); + VarDecl *PrevUBCapDecl = + std::next(CS->captures().begin())->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous UB in combined directive"); + + PrevLB = buildDeclRefExpr(SemaRef, PrevLBCapDecl, + PrevLBCapDecl->getType(), InitLoc); + PrevUB = buildDeclRefExpr(SemaRef, PrevUBCapDecl, + PrevUBCapDecl->getType(), InitLoc); } } diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c @@ -35,231 +35,230 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_5:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_8:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_9:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_10:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_11:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_12:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_13:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_14:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_15:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_17:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_18:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_19:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_20:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_8]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_9]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_22:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_24:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_30:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_31:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_32:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_33:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_35:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_37:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_38:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_39:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_41:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_42:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_45:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_46:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_47:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_55:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_35]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_46]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_47]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_60:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_61:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_62:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_68:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_69:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_70:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_72:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_76:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_77:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_79:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_82:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_83:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_84:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_88:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_89:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_90:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_91:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_93:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_94:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_95:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_96:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_97:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_98:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_99:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_77]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_78]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_88]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_89]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_101:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_102:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_107:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_108:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_109:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_111:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_113:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_114:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_116:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_117:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_119:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_120:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_121:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_122:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_124:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_125:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_126:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_127:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_129:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_130:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_131:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_133:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_138:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_139:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_141:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_142:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_119]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_120]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_130]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_131]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_143:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_144:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_145:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_146:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_149:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_151:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_152:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPDistributeParallelForSimdDirective [[ADDR_154:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_155:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_156:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: `-CapturedStmt [[ADDR_158:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_159:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_160:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_162:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_163:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_164:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_165:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_166:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_168:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_169:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_171:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_172:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_173:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_174:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_176:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_178:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_179:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_180:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_181:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_182:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_184:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_185:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_186:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_187:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_188:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_189:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_190:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_191:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_192:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_193:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_194:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_195:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_196:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_162]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_163]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_173]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_174]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_184]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_185]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_198:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_200:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_202:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_203:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c @@ -35,231 +35,230 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_5:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_8:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_9:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_10:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_11:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_12:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_13:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_14:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_15:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_17:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_18:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_19:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_20:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_8]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_9]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_22:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_24:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_30:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_31:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_32:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_33:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_35:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_37:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_38:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_39:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_41:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_42:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_45:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_46:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_47:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_55:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_35]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_46]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_47]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_60:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_61:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_62:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_68:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_69:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_70:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_72:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_76:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_77:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_79:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_82:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_83:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_84:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_88:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_89:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_90:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_91:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_93:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_94:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_95:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_96:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_97:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_98:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_99:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_77]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_78]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_88]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_89]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_101:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_102:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_107:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_108:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_109:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_111:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_113:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_114:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_116:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_117:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_119:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_120:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_121:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_122:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_124:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_125:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_126:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_127:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_129:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_130:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_131:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_133:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_138:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_139:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_141:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_142:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_119]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_120]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_130]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_131]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_143:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_144:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_145:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_146:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_149:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_151:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_152:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPDistributeParallelForDirective [[ADDR_154:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_155:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_156:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: `-CapturedStmt [[ADDR_158:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_159:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_160:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_162:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_163:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_164:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_165:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_166:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_168:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_169:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_171:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_172:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_173:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_174:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_176:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_178:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_179:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_180:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_181:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_182:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_184:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_185:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_186:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_187:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_188:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_189:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_190:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_191:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_192:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_193:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_194:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_195:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_196:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_162]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_163]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_173]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_174]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_184]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_185]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_198:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_200:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_202:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_203:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c @@ -35,1926 +35,1965 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_11:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_13:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_16:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_17:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_18:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_19:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_20:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_21:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_22:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_25:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_26:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_28:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_29:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_33:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_34:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_37:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_38:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_41:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_43:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_44:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_46:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_47:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_49:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_51:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_52:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_53:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_54:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_55:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_59:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_60:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_61:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_10]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_44]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_45]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_46]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_47]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_48]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_49]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_63:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_64:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_65:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_66:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_67:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_68:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_69:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_70:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_71:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_72:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_76:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_77:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_78:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_80:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_81:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_82:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_83:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_84:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_86:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_87:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_88:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_89:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_91:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_92:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_93:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_94:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_95:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_96:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_97:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_99:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_101:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_102:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_106:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_107:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_108:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_109:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_110:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_111:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_112:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_113:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_114:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_115:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_116:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_117:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_118:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_119:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_120:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_121:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_126:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_127:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_129:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_130:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_131:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_132:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_133:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_134:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_137:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_138:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_139:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_141:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_142:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_143:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_144:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_145:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_74]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_75]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_125]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_126]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_127]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_128]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_129]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_130]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_146:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_148:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_149:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_151:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_152:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_154:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_155:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_156:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_158:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_159:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_160:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_162:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_163:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_164:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_165:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_166:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_167:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_168:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_169:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_170:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_171:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_172:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_173:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_174:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_176:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_177:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_178:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_179:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_180:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_181:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_182:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_183:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_184:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_185:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_186:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_188:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_189:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_190:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_191:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_192:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_193:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_194:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_195:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_196:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_197:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_198:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_199:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_200:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_201:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_202:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_203:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_204:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_205:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_206:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_207:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_208:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_209:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_211:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_212:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_213:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_214:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_215:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_216:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_217:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_218:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_220:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_221:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_222:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_227:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_228:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_229:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_231:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_232:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_233:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_162]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_163]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_211]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_212]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_213]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_214]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_215]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_216]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_217]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_218]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_234:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_235:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_236:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_237:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_238:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_239:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_240:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_241:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_242:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_243:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_244:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_245:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_247:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_248:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_249:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_250:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_251:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_252:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_253:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_254:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_255:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_256:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_257:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_258:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_259:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_260:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_261:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_262:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_263:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_264:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_265:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_266:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_267:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_268:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_269:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_270:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_271:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_272:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_273:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_274:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_275:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_276:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_277:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_278:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_279:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_280:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_281:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_282:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_283:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_284:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_285:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_286:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_287:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_288:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_289:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_290:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_291:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_292:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_293:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_294:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_295:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_296:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_297:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_299:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_300:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_301:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_302:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_303:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_304:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_305:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_306:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_308:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_309:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_310:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_311:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_312:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_316:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_317:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_318:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_319:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_320:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_321:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_250]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_251]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_299]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_300]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_301]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_302]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_303]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_304]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_305]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_306]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_322:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_323:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_324:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_325:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_326:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_327:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_328:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_329:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_330:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_331:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_332:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_333:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_334:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_335:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_336:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_337:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_338:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_339:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_340:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_341:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_342:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_343:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_344:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_345:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_346:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_347:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_348:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_349:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_350:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_351:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_352:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_353:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_354:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_355:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_356:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_357:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_358:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_359:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_361:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_362:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_363:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_364:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_365:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_366:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_367:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_368:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_369:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_370:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_371:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_372:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_373:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_374:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_375:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_377:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_378:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_379:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_380:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_381:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_382:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_384:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_385:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_386:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_387:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_388:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_389:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_390:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_391:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_392:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_393:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_394:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_395:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_396:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_397:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_398:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_399:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_400:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_401:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_402:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_403:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_404:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_405:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_406:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_407:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_408:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_409:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_410:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_411:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_412:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_413:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_415:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_416:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_417:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_418:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_419:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_420:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_421:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_422:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_423:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_424:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_425:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_426:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_427:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_428:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_429:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_430:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_340]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_341]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_401]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_402]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_403]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_404]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_405]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_406]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_407]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_408]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_409]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_410]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_411]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_412]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_431:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_432:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_433:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c @@ -35,1926 +35,1965 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_11:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_13:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_16:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_17:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_18:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_19:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_20:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_21:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_22:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_25:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_26:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_28:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_29:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_33:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_34:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_37:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_38:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_41:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_43:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_44:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_46:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_47:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_48:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_49:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_51:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_52:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_53:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_54:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_55:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_59:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_60:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_61:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_10]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_44]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_45]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_46]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_47]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_48]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_49]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_63:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_64:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_65:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_66:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_67:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_68:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_69:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_70:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_71:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_72:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_76:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_77:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_78:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_80:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_81:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_82:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_83:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_84:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_86:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_87:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_88:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_89:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_91:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_92:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_93:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_94:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_95:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_96:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_97:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_99:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_101:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_102:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_106:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_107:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_108:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_109:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_110:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_111:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_112:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_113:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_114:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_115:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_116:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_117:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_118:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_119:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_120:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_121:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_126:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_127:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_129:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_130:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_131:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_132:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_133:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_134:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_137:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_138:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_139:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_141:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_142:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_143:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_144:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_145:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_74]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_75]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_125]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_126]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_127]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_128]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_129]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_130]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_146:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_148:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_149:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_151:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_152:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_154:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_155:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_156:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_158:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_159:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_160:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_162:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_163:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_164:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_165:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_166:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_167:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_168:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_169:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_170:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_171:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_172:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_173:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_174:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_176:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_177:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_178:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_179:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_180:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_181:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_182:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_183:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_184:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_185:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_186:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_188:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_189:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_190:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_191:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_192:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_193:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_194:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_195:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_196:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_197:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_198:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_199:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_200:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_201:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_202:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_203:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_204:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_205:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_206:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_207:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_208:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_209:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_211:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_212:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_213:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_214:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_215:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_216:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_217:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_218:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_220:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_221:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_222:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_227:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_228:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_229:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_231:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_232:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_233:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_162]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_163]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_211]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_212]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_213]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_214]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_215]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_216]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_217]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_218]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_234:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_235:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_236:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_237:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_238:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_239:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_240:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_241:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_242:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_243:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_244:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_245:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_247:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_248:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_249:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_250:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_251:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_252:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_253:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_254:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_255:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_256:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_257:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_258:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_259:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_260:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_261:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_262:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_263:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_264:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_265:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_266:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_267:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_268:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_269:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_270:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_271:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_272:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_273:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_274:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_275:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_276:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_277:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_278:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_279:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_280:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_281:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_282:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_283:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_284:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_285:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_286:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_287:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_288:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_289:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_290:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_291:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_292:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_293:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_294:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_295:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_296:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_297:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_299:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_300:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_301:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_302:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_303:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_304:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_305:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_306:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_308:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_309:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_310:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_311:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_312:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_316:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_317:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_318:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_319:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_320:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_321:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_250]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_251]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_299]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_300]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_301]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_302]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_303]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_304]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_305]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_306]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 31 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_322:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_323:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_324:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_325:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_326:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_327:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_328:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_329:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_330:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_331:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_332:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_333:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_334:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_335:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_336:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_337:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_338:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_339:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_340:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_341:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_342:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_343:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_344:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_345:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_346:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_347:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_348:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_349:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_350:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_351:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_352:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_353:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_354:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_355:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_356:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_357:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_358:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_359:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_361:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_362:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_363:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_364:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_365:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_366:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_367:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_368:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_369:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_370:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_371:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_372:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_373:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_374:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_375:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_377:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_378:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_379:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_380:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_381:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_382:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_384:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_385:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_386:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_387:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_388:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_389:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_390:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_391:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_392:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_393:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_394:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_395:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_396:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_397:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_398:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_399:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_400:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_401:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_402:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_403:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_404:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_405:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_406:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_407:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_408:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_409:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_410:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_411:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_412:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_413:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_415:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_416:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_417:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_418:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_419:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_420:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_421:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_422:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_423:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_424:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_425:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_426:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_427:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_428:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_429:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_430:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_340]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_341]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_401]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_402]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_403]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_404]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_405]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_406]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_407]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_408]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_409]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_410]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_411]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_412]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_431:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_432:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_433:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c @@ -40,2144 +40,2143 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:8:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_12:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_13:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_14:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_16:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_17:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_18:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_19:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_20:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_22:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_24:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_28:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_29:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_33:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_34:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_37:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_38:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_41:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_44:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_46:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_47:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_49:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_50:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_51:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_52:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_55:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_56:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_57:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_58:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_60:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_61:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_63:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_64:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_65:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_66:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_67:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_68:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_69:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_70:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_71:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_72:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_73:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_10]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_11]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_43]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_44]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_45]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_46]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_47]] col:23 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_48]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_49]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_50]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_51]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_52]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_53]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_54]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_55]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_56]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_57]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_58]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_60]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_61]] <> 'int' 1 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_74:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_75:0x[a-z0-9]*]] line:10:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_76:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_77:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_78:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_80:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_82:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_83:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_84:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_85:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_86:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_88:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_89:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_91:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_92:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_93:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_94:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_95:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_96:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_97:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_99:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_100:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_101:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_102:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_103:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_104:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_105:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_106:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_107:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_108:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_109:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_110:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_111:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_112:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_113:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_114:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_115:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_116:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_117:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_118:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_119:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_120:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_121:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_126:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_127:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_129:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_130:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_131:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_132:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_133:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_137:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_138:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_139:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_140:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_141:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_142:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_143:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_144:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_145:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_148:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_149:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_150:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_151:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_152:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_153:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_154:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_155:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_156:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_157:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_158:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_159:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_160:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_161:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_162:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_163:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_164:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_165:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_166:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_167:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_168:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_86]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_87]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_88]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_133]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_134]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_136]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_137]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_138]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_139]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_140]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_141]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_142]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_143]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_144]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_145]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_146]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_147]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_148]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_149]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_150]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_152]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_153]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_169:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_171:0x[a-z0-9]*]] line:18:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_172:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_173:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_174:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_175:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_176:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_178:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_179:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_180:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_181:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_182:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_184:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_185:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_186:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_188:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_189:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_190:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_191:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_192:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_193:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_194:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_195:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_196:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_198:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_200:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_202:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_203:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_204:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_205:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_206:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_207:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_208:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_209:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_211:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_212:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_213:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_214:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_215:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_216:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_217:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_218:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_220:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_221:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_222:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_227:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_228:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_229:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_231:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_232:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_233:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_234:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_235:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_236:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_237:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_238:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_239:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_240:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_241:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_242:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_243:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_244:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_245:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_247:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_248:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_249:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_250:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_251:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_252:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_253:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_254:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_255:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_256:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_257:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_258:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_259:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_260:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_261:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_262:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_263:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_264:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_265:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_266:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_267:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_182]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_183]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_184]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_185]] 'int' +// CHECK-NEXT: | | | | |-value: Int 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_186]] 'int' 1 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_187]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_232]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_233]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_234]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_235]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_236]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_237]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_238]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_239]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_240]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_241]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_242]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_243]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_244]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_245]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_247]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_248]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_249]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_251]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_252]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_268:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_269:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_270:0x[a-z0-9]*]] line:26:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_271:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_272:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_273:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_274:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_275:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_276:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_277:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_278:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_279:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_280:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_281:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_282:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_283:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_284:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_285:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_286:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_287:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_288:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_289:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_290:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_291:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_292:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_293:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_294:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_295:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_296:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_297:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_299:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_300:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_301:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_302:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_303:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_304:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_305:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_306:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_308:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_309:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_310:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_311:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_312:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_316:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_317:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_318:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_319:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_320:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_321:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_322:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_323:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_324:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_325:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_326:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_327:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_328:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_329:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_330:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_331:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_332:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_333:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_334:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_335:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_336:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_337:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_338:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_339:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_340:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_341:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_342:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_343:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_344:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | | |-ParenExpr [[ADDR_345:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_346:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_347:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_348:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | | `-ParenExpr [[ADDR_349:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_350:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_351:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_352:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_353:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_354:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_355:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_356:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_357:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_358:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_359:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_361:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_362:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_363:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_364:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_365:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_366:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_367:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_368:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_369:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_370:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_371:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_372:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_373:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_374:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_375:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_377:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_378:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_379:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_380:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_381:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_281]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_282]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_283]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_284]] 'int' +// CHECK-NEXT: | | | | |-value: Int 2 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_285]] 'int' 2 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_286]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_331]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_332]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_333]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_334]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_335]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_336]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_337]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_338]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_339]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_340]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_341]] > 'long' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_342]] 'long' '*' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_343]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_344]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_345]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_346]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_347]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_348]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_349]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_350]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_351]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_353]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_354]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_355]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_356]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_357]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_358]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_359]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_361]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_362]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_364]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_365]] <> 'long' +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_366]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_382:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_384:0x[a-z0-9]*]] line:34:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_385:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_386:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_387:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_388:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetDirective [[ADDR_389:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_390:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_391:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_392:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_393:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_394:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_395:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_396:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_397:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_398:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-OMPCollapseClause [[ADDR_399:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-ConstantExpr [[ADDR_400:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_401:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | `-CapturedStmt [[ADDR_402:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_403:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_404:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_405:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_406:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_407:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_408:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_409:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_410:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_411:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_412:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_413:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_415:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_416:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_417:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_418:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_419:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_420:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_421:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_422:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_423:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_424:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_425:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_426:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_427:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_428:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_429:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_430:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_431:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_432:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_433:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_434:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_435:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_436:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_437:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_438:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_439:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_440:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_441:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_442:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_443:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_444:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_445:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_446:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_447:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_448:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_449:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_450:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_451:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_452:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_453:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_454:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_455:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_456:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_457:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_458:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_459:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_460:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_461:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_462:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_463:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_464:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_465:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_466:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_467:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_468:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_469:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_470:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-OMPCapturedExprDecl [[ADDR_471:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_472:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_473:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_474:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_475:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_476:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_477:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_478:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_479:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_480:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_481:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_482:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_483:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_484:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_485:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_486:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_487:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_488:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_489:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_490:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_491:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_492:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_493:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_494:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_495:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_496:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_497:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_498:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_499:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_500:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_501:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_502:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_503:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_504:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_505:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_506:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_507:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_508:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_509:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_510:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_511:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_512:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_513:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_514:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_515:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_397]] <> nothrow +// CHECK-NEXT: | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_398]] +// CHECK-NEXT: | | |-OMPCollapseClause [[ADDR_399]] +// CHECK-NEXT: | | | `-ConstantExpr [[ADDR_400]] 'int' +// CHECK-NEXT: | | | |-value: Int 2 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_401]] 'int' 2 +// CHECK-NEXT: | | `-CapturedStmt [[ADDR_402]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_460]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_461]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_462]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_463]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_464]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_465]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_466]] col:23 implicit 'int &' +// CHECK-NEXT: | | |-FieldDecl [[ADDR_467]] col:25 implicit 'int &' +// CHECK-NEXT: | | `-FieldDecl [[ADDR_468]] col:27 implicit 'int &' +// CHECK-NEXT: | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_469]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_470]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-OMPCapturedExprDecl [[ADDR_471]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | `-BinaryOperator [[ADDR_472]] > 'long' '-' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_473]] 'long' '*' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_474]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_475]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_476]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_477]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_478]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_479]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_480]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_481]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_482]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_484]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_485]] 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_486]] 'int' '/' +// CHECK-NEXT: | | |-ParenExpr [[ADDR_487]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_488]] 'int' '-' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_489]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_490]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | `-ParenExpr [[ADDR_491]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_492]] > 'int' '+' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_493]] 'int' '-' +// CHECK-NEXT: | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_495]] <> 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_496]] <> 'long' +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_497]] <> 'int' 1 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_516:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_517:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_518:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c @@ -40,2144 +40,2143 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:3:1, line:8:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:8:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_12:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_13:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_14:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_16:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_17:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_18:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_19:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_20:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_22:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_24:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_28:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_29:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_33:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_34:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_37:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_38:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_41:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_44:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_46:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_47:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_49:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_50:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_51:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_52:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_55:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_56:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_57:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_58:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_60:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_61:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_63:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_64:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_65:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_66:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_67:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_68:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_69:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_70:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_71:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_72:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_73:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_10]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_11]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_43]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_44]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_45]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_46]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_47]] col:3 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_48]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_49]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_50]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_51]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_52]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_53]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_54]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_55]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_56]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_57]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_58]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_60]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_61]] <> 'int' 1 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_74:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_75:0x[a-z0-9]*]] line:10:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_76:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_77:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_78:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_80:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_82:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_83:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_84:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_85:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_86:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_88:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_89:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_91:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_92:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_93:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_94:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_95:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_96:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_97:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_99:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_100:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_101:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_102:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_103:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_104:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_105:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_106:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_107:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_108:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_109:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_110:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_111:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_112:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_113:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_114:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_115:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_116:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_117:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_118:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_119:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_120:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_121:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_126:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_127:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_129:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_130:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_131:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_132:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_133:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_137:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_138:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_139:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_140:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_141:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_142:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_143:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_144:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_145:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_148:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_149:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_150:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_151:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_152:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_153:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_154:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_155:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_156:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_157:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_158:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_159:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_160:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_161:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_162:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_163:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_164:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_165:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_166:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_167:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_168:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_86]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_87]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_88]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_133]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_134]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_136]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_137]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_138]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_139]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_140]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_141]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_142]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_143]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_144]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_145]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_146]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_147]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_148]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_149]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_150]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_152]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_153]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_169:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_171:0x[a-z0-9]*]] line:18:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_172:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_173:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_174:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_175:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_176:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_178:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_179:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_180:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_181:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_182:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_184:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_185:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_186:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_188:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_189:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_190:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_191:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_192:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_193:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_194:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_195:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_196:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_198:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_200:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_202:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_203:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_204:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_205:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_206:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_207:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_208:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_209:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_211:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_212:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_213:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_214:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_215:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_216:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_217:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_218:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_220:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_221:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_222:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_227:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_228:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_229:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_231:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_232:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_233:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_234:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_235:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_236:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_237:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_238:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_239:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_240:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_241:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_242:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_243:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_244:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_245:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_247:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_248:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_249:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_250:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_251:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_252:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_253:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_254:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_255:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_256:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_257:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_258:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_259:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_260:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_261:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_262:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_263:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_264:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_265:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_266:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_267:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_182]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_183]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_184]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_185]] 'int' +// CHECK-NEXT: | | | | |-value: Int 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_186]] 'int' 1 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_187]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_232]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_233]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_234]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_235]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_236]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_237]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_238]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_239]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_240]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_241]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_242]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_243]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_244]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_245]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_247]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_248]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_249]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_251]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_252]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_268:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_269:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_270:0x[a-z0-9]*]] line:26:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_271:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_272:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_273:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_274:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_275:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_276:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_277:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_278:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_279:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_280:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_281:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_282:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_283:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_284:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_285:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_286:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_287:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_288:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_289:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_290:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_291:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_292:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_293:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_294:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_295:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_296:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_297:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_299:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_300:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_301:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_302:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_303:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_304:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_305:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_306:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_308:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_309:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_310:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_311:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_312:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_316:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_317:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_318:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_319:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_320:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_321:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_322:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_323:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_324:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_325:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_326:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_327:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_328:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_329:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_330:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_331:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_332:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_333:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_334:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_335:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_336:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_337:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_338:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_339:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_340:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_341:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_342:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_343:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_344:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | | |-ParenExpr [[ADDR_345:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_346:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_347:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_348:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | | `-ParenExpr [[ADDR_349:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_350:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_351:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_352:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_353:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_354:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_355:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_356:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_357:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_358:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_359:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_361:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_362:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_363:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_364:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_365:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_366:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_367:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_368:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_369:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_370:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_371:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_372:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_373:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_374:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_375:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_377:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_378:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_379:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_380:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_381:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_281]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_282]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_283]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_284]] 'int' +// CHECK-NEXT: | | | | |-value: Int 2 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_285]] 'int' 2 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_286]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_331]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_332]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_333]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_334]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_335]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_336]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_337]] col:5 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_338]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_339]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_340]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_341]] > 'long' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_342]] 'long' '*' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_343]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_344]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_345]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_346]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_347]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_348]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_349]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_350]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_351]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_353]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_354]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_355]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_356]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_357]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_358]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_359]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_361]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_362]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_364]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_365]] <> 'long' +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_366]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_382:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_384:0x[a-z0-9]*]] line:34:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_385:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_386:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_387:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_388:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetDirective [[ADDR_389:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_390:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_391:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_392:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_393:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_394:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_395:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_396:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_397:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_398:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-OMPCollapseClause [[ADDR_399:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-ConstantExpr [[ADDR_400:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_401:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | `-CapturedStmt [[ADDR_402:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_403:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_404:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_405:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_406:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_407:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_408:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_409:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_410:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_411:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_412:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_413:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_415:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_416:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_417:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_418:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_419:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_420:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_421:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_422:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_423:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_424:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_425:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_426:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_427:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_428:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_429:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_430:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_431:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_432:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_433:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_434:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_435:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_436:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_437:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_438:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_439:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_440:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_441:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_442:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_443:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_444:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_445:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_446:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_447:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_448:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_449:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_450:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_451:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_452:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_453:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_454:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_455:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_456:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_457:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_458:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_459:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_460:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_461:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_462:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_463:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_464:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_465:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_466:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_467:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_468:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_469:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_470:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-OMPCapturedExprDecl [[ADDR_471:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_472:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_473:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_474:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_475:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_476:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_477:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_478:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_479:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_480:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_481:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_482:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_483:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_484:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_485:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_486:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_487:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_488:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_489:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_490:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_491:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_492:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_493:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_494:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_495:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_496:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_497:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_498:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_499:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_500:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_501:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_502:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_503:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_504:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_505:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_506:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_507:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_508:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_509:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_510:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_511:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_512:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_513:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_514:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_515:0x[a-z0-9]*]] <> Implicit 31 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_397]] <> nothrow +// CHECK-NEXT: | |-OMPTeamsDistributeParallelForDirective [[ADDR_398]] +// CHECK-NEXT: | | |-OMPCollapseClause [[ADDR_399]] +// CHECK-NEXT: | | | `-ConstantExpr [[ADDR_400]] 'int' +// CHECK-NEXT: | | | |-value: Int 2 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_401]] 'int' 2 +// CHECK-NEXT: | | `-CapturedStmt [[ADDR_402]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_460]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_461]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_462]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_463]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_464]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_465]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_466]] col:3 implicit 'int &' +// CHECK-NEXT: | | |-FieldDecl [[ADDR_467]] col:5 implicit 'int &' +// CHECK-NEXT: | | `-FieldDecl [[ADDR_468]] col:27 implicit 'int &' +// CHECK-NEXT: | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_469]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_470]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-OMPCapturedExprDecl [[ADDR_471]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | `-BinaryOperator [[ADDR_472]] > 'long' '-' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_473]] 'long' '*' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_474]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_475]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_476]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_477]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_478]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_479]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_480]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_481]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_482]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_484]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_485]] 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_486]] 'int' '/' +// CHECK-NEXT: | | |-ParenExpr [[ADDR_487]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_488]] 'int' '-' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_489]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_490]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | `-ParenExpr [[ADDR_491]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_492]] > 'int' '+' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_493]] 'int' '-' +// CHECK-NEXT: | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_495]] <> 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_496]] <> 'long' +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_497]] <> 'int' 1 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_516:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_517:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_518:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' diff --git a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c --- a/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c @@ -26,7 +26,8 @@ // Verify cases when OpenMP target's and host's long-double semantics differ. // OMP-TARGET-LABEL: define internal void @.omp_outlined.( -// OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr +// OMP-TARGET: %[[PTR:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.anon, ptr %[[ADDR:.+]], i32 0, i32 1 +// OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[PTR]], align 8 // OMP-TARGET: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8 // OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]]) diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c --- a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -23,12 +23,12 @@ // CHECK-AMD-NEXT: entry: // CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8, addrspace(5) // CHECK-AMD-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr // CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr -// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// CHECK-AMD-NEXT: [[OMP_OUTLINED_ARG_AGG__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_OUTLINED_ARG_AGG_]] to ptr // CHECK-AMD-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr // CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr // CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 @@ -38,13 +38,15 @@ // CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-AMD: user_code.entry: // CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) -// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 0 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 1 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 // CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 -// CHECK-AMD-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]] +// CHECK-AMD-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]]) #[[ATTR2:[0-9]+]] // CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) // CHECK-AMD-NEXT: ret void // CHECK-AMD: worker.exit: @@ -52,12 +54,12 @@ // // // CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-AMD-NEXT: entry: // CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) @@ -67,13 +69,15 @@ // CHECK-AMD-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8, addrspace(5) // CHECK-AMD-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5) -// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) +// CHECK-AMD-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8, addrspace(5) // CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr // CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[__CONTEXT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__CONTEXT_ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N]] to ptr // CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr // CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr @@ -83,129 +87,139 @@ // CHECK-AMD-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr // CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr // CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURED_OMP_PREVIOUS_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURED_OMP_PREVIOUS_UB]] to ptr // CHECK-AMD-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr -// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr -// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK-AMD-NEXT: [[OMP_OUTLINED_ARG_AGG__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OMP_OUTLINED_ARG_AGG_]] to ptr // CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 // CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] // CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-AMD: omp.precond.then: // CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 // CHECK-AMD-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK-AMD-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-AMD: cond.true: -// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[COND_END:%.*]] // CHECK-AMD: cond.false: -// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[COND_END]] // CHECK-AMD: cond.end: -// CHECK-AMD-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-AMD-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK-AMD-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-AMD: omp.inner.for.cond: -// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK-AMD-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP11]], [[ADD]] +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK-AMD-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK-AMD-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-AMD: omp.inner.for.body: -// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP17]], ptr [[N_CASTED_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP19:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 -// CHECK-AMD-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP14]] to ptr -// CHECK-AMD-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK-AMD-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 -// CHECK-AMD-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP16]] to ptr -// CHECK-AMD-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK-AMD-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 -// CHECK-AMD-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK-AMD-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK-AMD-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 -// CHECK-AMD-NEXT: store ptr [[TMP19]], ptr [[TMP26]], align 8 -// CHECK-AMD-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK-AMD-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK-AMD-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK-AMD-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 0 +// CHECK-AMD-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK-AMD-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 1 +// CHECK-AMD-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK-AMD-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 2 +// CHECK-AMD-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK-AMD-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], i32 0, i32 3 +// CHECK-AMD-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-AMD-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK-AMD-NEXT: [[TMP29:%.*]] = addrspacecast ptr addrspace(5) [[DOTTMP_OUTLINED_AGG_ARG]] to ptr +// CHECK-AMD-NEXT: [[TMP30:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK-AMD-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[TMP29]], ptr [[TMP30]]) +// CHECK-AMD-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG__ASCAST]], align 8 +// CHECK-AMD-NEXT: store [[STRUCT_ANON_0]] [[TMP32]], ptr [[TMP31]], align 8 +// CHECK-AMD-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP34]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[TMP31]]) +// CHECK-AMD-NEXT: call void @__kmpc_free_shared(ptr [[TMP30]], i64 32) // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-AMD: omp.inner.for.inc: -// CHECK-AMD-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-AMD-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK-AMD-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK-AMD-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK-AMD-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK-AMD-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK-AMD-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK-AMD-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK-AMD: cond.true10: -// CHECK-AMD-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[COND_END12:%.*]] // CHECK-AMD: cond.false11: -// CHECK-AMD-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[COND_END12]] // CHECK-AMD: cond.end12: -// CHECK-AMD-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK-AMD-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK-AMD-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-AMD: omp.inner.for.end: // CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-AMD: omp.loop.exit: -// CHECK-AMD-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP41]]) +// CHECK-AMD-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP47]]) // CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] // CHECK-AMD: omp.precond.end: // CHECK-AMD-NEXT: ret void // // // CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__.1 -// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1]] { +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-AMD-NEXT: entry: // CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) -// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) @@ -218,10 +232,10 @@ // CHECK-AMD-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5) // CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr // CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr -// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr -// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr -// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr -// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[__CONTEXT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__CONTEXT_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURED_OMP_PREVIOUS_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURED_OMP_PREVIOUS_UB]] to ptr +// CHECK-AMD-NEXT: [[N_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N]] to ptr // CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr // CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr @@ -234,71 +248,79 @@ // CHECK-AMD-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr // CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 // CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-AMD-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK-AMD-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK-AMD-NEXT: store i32 [[TMP6]], ptr [[N_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[N_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], 0 // CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 -// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-AMD: omp.precond.then: // CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK-AMD-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 // CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK-AMD-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP7]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) -// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 -// CHECK-AMD-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP15]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// CHECK-AMD-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-AMD: omp.inner.for.cond: -// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[CONV5:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP10]] +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CONV5:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP18]] // CHECK-AMD-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-AMD: omp.inner.for.body: -// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-AMD-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-AMD-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP13:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-AMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK-AMD-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK-AMD-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK-AMD-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK-AMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[IDXPROM]] +// CHECK-AMD-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 // CHECK-AMD-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-AMD: omp.body.continue: // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-AMD: omp.inner.for.inc: -// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 -// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-AMD-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 // CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-AMD: omp.inner.for.end: // CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-AMD: omp.loop.exit: -// CHECK-AMD-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 -// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP18]]) +// CHECK-AMD-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP26]]) // CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] // CHECK-AMD: omp.precond.end: // CHECK-AMD-NEXT: ret void diff --git a/clang/test/OpenMP/bug54082.c b/clang/test/OpenMP/bug54082.c --- a/clang/test/OpenMP/bug54082.c +++ b/clang/test/OpenMP/bug54082.c @@ -68,44 +68,47 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[X_TRAITS:%.*]] = alloca [1 x %struct.omp_alloctrait_t], align 16 // CHECK-NEXT: [[X_ALLOC:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull [[X_TRAITS]]) #[[ATTR5:[0-9]+]] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) [[X_TRAITS]], ptr noundef nonnull align 16 dereferenceable(16) @__const.foo.x_traits, i64 16, i1 false) // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: [[CALL:%.*]] = call i64 @omp_init_allocator(i64 noundef 0, i32 noundef 1, ptr noundef nonnull [[X_TRAITS]]) #[[ATTR5]] // CHECK-NEXT: store i64 [[CALL]], ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3:![0-9]+]] -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB2:[0-9]+]], i32 1, ptr nonnull @.omp_outlined., ptr nonnull [[X_ALLOC]]) +// CHECK-NEXT: store ptr [[X_ALLOC]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @[[GLOB2:[0-9]+]], i32 1, ptr nonnull @.omp_outlined., ptr nonnull [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[X_ALLOC]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[X_TRAITS]]) #[[ATTR5]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias nocapture noundef readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture readnone [[DOTBOUND_TID_:%.*]], ptr nocapture noundef nonnull readonly align 8 dereferenceable(8) [[X_ALLOC:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK-SAME: (ptr noalias nocapture noundef readonly [[DOTGLOBAL_TID_:%.*]], ptr noalias nocapture readnone [[DOTBOUND_TID_:%.*]], ptr noalias nocapture noundef readonly [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT]], align 8, !tbaa [[TBAA6]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DOTOMP_LB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA6:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DOTOMP_UB]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DOTOMP_STRIDE]]) #[[ATTR5]] -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA6]] -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr -// CHECK-NEXT: [[DOTX__VOID_ADDR:%.*]] = tail call ptr @__kmpc_alloc(i32 [[TMP0]], i64 8, ptr [[CONV]]) -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr nonnull [[DOTOMP_IS_LAST]], ptr nonnull [[DOTOMP_LB]], ptr nonnull [[DOTOMP_UB]], ptr nonnull [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 1023) -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr nonnull @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_ALLOC]], align 8, !tbaa [[TBAA3]] -// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP3]] to ptr -// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTX__VOID_ADDR]], ptr [[CONV5]]) +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP2]] to ptr +// CHECK-NEXT: [[DOTX__VOID_ADDR:%.*]] = tail call ptr @__kmpc_alloc(i32 [[TMP1]], i64 8, ptr [[CONV]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr nonnull @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr nonnull [[DOTOMP_IS_LAST]], ptr nonnull [[DOTOMP_LB]], ptr nonnull [[DOTOMP_UB]], ptr nonnull [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP3]], i32 1023) +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr nonnull @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP0]], align 8, !tbaa [[TBAA3]] +// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTX__VOID_ADDR]], ptr [[CONV5]]) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DOTOMP_IS_LAST]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DOTOMP_STRIDE]]) #[[ATTR5]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DOTOMP_UB]]) #[[ATTR5]] diff --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp --- a/clang/test/OpenMP/bug60602.cpp +++ b/clang/test/OpenMP/bug60602.cpp @@ -235,27 +235,30 @@ // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[N_CASTED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define internal void @.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -268,79 +271,83 @@ // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK: omp.precond.then: // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], ptr [[J3]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[J3]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[J3]], align 4 -// CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[IDXPROM6]] -// CHECK-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX7]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[J3]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[J3]], align 4 +// CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM6]] +// CHECK-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX7]], align 4 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK-NEXT: br label [[OMP_PRECOND_END]] // CHECK: omp.precond.end: // CHECK-NEXT: ret void @@ -352,27 +359,30 @@ // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[N_CASTED]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define internal void @.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -382,94 +392,111 @@ // CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -2 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], -2 // CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 3 // CHECK-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK: omp.precond.then: // CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] // CHECK-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP12]], 1 -// CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP11]], [[ADD]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP17]], 1 +// CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP16]], [[ADD]] // CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..2, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP19]], ptr [[TMP20]]) +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP21]], [[TMP22]] +// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP32]], [[TMP33]] // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP24]]) +// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP35]]) // CHECK-NEXT: br label [[OMP_PRECOND_END]] // CHECK: omp.precond.end: // CHECK-NEXT: ret void // // // CHECK-LABEL: define internal void @.omp_outlined..2 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -482,89 +509,97 @@ // CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP1]], -2 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP10]], -2 // CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 3 // CHECK-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // CHECK-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK: omp.precond.then: // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] +// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP17]], [[TMP18]] // CHECK-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP14]], 1 -// CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP13]], [[ADD]] +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], 1 +// CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP22]], [[ADD]] // CHECK-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP15]], 3 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP24]], 3 // CHECK-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD7]], ptr [[J4]], align 4 -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[J4]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], 2 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[J4]], align 4 -// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[J4]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP27]], 2 +// CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[J4]], align 4 +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK-NEXT: store i32 [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]]) +// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) // CHECK-NEXT: br label [[OMP_PRECOND_END]] // CHECK: omp.precond.end: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -84,6 +84,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -104,39 +105,46 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I24:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_36:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_37:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_38:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[ARGV_ADDR]], ptr [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -144,8 +152,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -158,28 +166,28 @@ // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_3]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_4]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_4]], ptr [[DOTOMP_SECTIONS_LB_1]], ptr [[DOTOMP_SECTIONS_UB_2]], ptr [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] // CHECK1: omp.inner.for.cond6: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK1: omp.inner.for.body8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case9: -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] // CHECK1: .cancel.exit10: // CHECK1-NEXT: br label [[CANCEL_EXIT19:%.*]] // CHECK1: cancel.exit: @@ -188,9 +196,9 @@ // CHECK1: .cancel.continue11: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT15]] // CHECK1: .omp.sections.case12: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] // CHECK1: .cancel.exit13: // CHECK1-NEXT: br label [[CANCEL_EXIT19]] // CHECK1: .cancel.continue14: @@ -198,8 +206,8 @@ // CHECK1: .omp.sections.exit15: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK1: omp.inner.for.inc16: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[INC17]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6]] // CHECK1: omp.inner.for.end18: @@ -207,57 +215,57 @@ // CHECK1-NEXT: br label [[CANCEL_CONT20:%.*]] // CHECK1: cancel.cont20: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB5]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB22]], ptr [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP27]] // CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26:%.*]] // CHECK1: omp.inner.for.cond26: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]] // CHECK1: omp.inner.for.body28: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP36]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I24]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP37]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] // CHECK1: .cancel.exit29: // CHECK1-NEXT: br label [[CANCEL_EXIT34:%.*]] // CHECK1: cancel.exit19: @@ -272,8 +280,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC31:%.*]] // CHECK1: omp.inner.for.inc31: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP40]], 1 // CHECK1-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26]] // CHECK1: omp.inner.for.end33: @@ -288,72 +296,77 @@ // CHECK1-NEXT: br label [[CANCEL_CONT35]] // CHECK1: cancel.cont35: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP39:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP39]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP39]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[TMP41:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP41]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP41]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_36]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_37]]) // CHECK1-NEXT: store i32 0, ptr [[R]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[ARGC_ADDR]], ptr [[R]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[R]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_38]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP46]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP4]], i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP7]], i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP11]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX1]], align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue3: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP20]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP20]] // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV7]], ptr [[ARRAYIDX5]], align 1 // CHECK1-NEXT: br label [[RETURN]] @@ -408,10 +421,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -419,34 +433,36 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -454,25 +470,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -480,43 +497,45 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -524,27 +543,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -554,112 +572,114 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[R3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: store i32 0, ptr [[R]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP17]], i32 2) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP20]], i32 2) +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[R3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[R]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[R3]], ptr [[TMP25]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB7:[0-9]+]], i32 [[TMP27]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[R]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB7:[0-9]+]], i32 [[TMP30]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB7]], i32 [[TMP27]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB7]], i32 [[TMP30]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP33]] monotonic, align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP36]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -715,7 +735,10 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_49:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_50:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -931,12 +954,16 @@ // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP36]], i32 0, i32 0 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14]]) // CHECK3-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], ptr [[TMP36]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_49]]) // CHECK3-NEXT: store i32 0, ptr [[R]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..3, ptr [[ARGC_ADDR]], ptr [[R]]) -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: ret i32 [[TMP39]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP39]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[R]], ptr [[TMP40]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_50]]) +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK3-NEXT: ret i32 [[TMP41]] // // // CHECK3-LABEL: define {{[^@]+}}@main..omp_par @@ -1049,10 +1076,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1060,34 +1088,36 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB17:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: @@ -1095,8 +1125,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1112,10 +1142,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1123,44 +1154,46 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) +// CHECK3-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK3: .omp.sections.case2.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] // CHECK3: .omp.sections.case2.section.after: @@ -1170,8 +1203,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1187,12 +1220,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1202,107 +1234,109 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[R3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[R]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM6]], i32 2) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[R3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[R]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[R3]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[R]], ptr [[TMP22]], align 8 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD12]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM11]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP23]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/cancellation_point_codegen.cpp b/clang/test/OpenMP/cancellation_point_codegen.cpp --- a/clang/test/OpenMP/cancellation_point_codegen.cpp +++ b/clang/test/OpenMP/cancellation_point_codegen.cpp @@ -86,6 +86,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -106,45 +107,52 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_43:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_44:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_45:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[ARGV_ADDR]], ptr [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -152,8 +160,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -166,28 +174,28 @@ // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_5]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_6]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_6]], ptr [[DOTOMP_SECTIONS_LB_3]], ptr [[DOTOMP_SECTIONS_UB_4]], ptr [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 -// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_3]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK1: omp.inner.for.cond8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END22:%.*]] // CHECK1: omp.inner.for.body10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE11:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE14:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case11: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] // CHECK1: .cancel.exit12: // CHECK1-NEXT: br label [[CANCEL_EXIT23:%.*]] // CHECK1: cancel.exit: @@ -196,15 +204,15 @@ // CHECK1: .cancel.continue13: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT19]] // CHECK1: .omp.sections.case14: -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] // CHECK1: .cancel.exit15: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue16: -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] // CHECK1: .cancel.exit17: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue18: @@ -212,8 +220,8 @@ // CHECK1: .omp.sections.exit19: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] // CHECK1: omp.inner.for.inc20: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[INC21]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8]] // CHECK1: omp.inner.for.end22: @@ -221,62 +229,62 @@ // CHECK1-NEXT: br label [[CANCEL_CONT24:%.*]] // CHECK1: cancel.cont24: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB26:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB26]], ptr [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP31]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: br i1 [[CMP29]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE]] ], [ [[TMP34]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP35]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] // CHECK1: omp.inner.for.cond30: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END39:%.*]] // CHECK1: omp.inner.for.body32: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP40]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I28]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 -// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] // CHECK1: .cancel.exit33: // CHECK1-NEXT: br label [[CANCEL_EXIT40:%.*]] // CHECK1: cancel.exit23: // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[CANCEL_CONT24]] // CHECK1: .cancel.continue34: -// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 -// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] // CHECK1: .cancel.exit35: // CHECK1-NEXT: br label [[CANCEL_EXIT40]] // CHECK1: .cancel.continue36: @@ -284,8 +292,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC37:%.*]] // CHECK1: omp.inner.for.inc37: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD38]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30]] // CHECK1: omp.inner.for.end39: @@ -300,54 +308,57 @@ // CHECK1-NEXT: br label [[CANCEL_CONT41]] // CHECK1: cancel.cont41: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP44:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP44]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP46:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP44]]) -// CHECK1-NEXT: [[TMP47:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP47]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP49:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP47]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..5) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[ARGC_ADDR]]) -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP50]] +// CHECK1-NEXT: [[TMP46:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP46]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP46]]) +// CHECK1-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_43]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_44]]) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_45]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_45]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP53]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue2: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX3]], align 1 // CHECK1-NEXT: br label [[RETURN]] // CHECK1: return: @@ -423,7 +434,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -454,10 +465,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -465,40 +477,42 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -506,25 +520,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -532,49 +547,51 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case3: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] // CHECK1: .cancel.exit4: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue5: @@ -582,26 +599,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -614,67 +631,69 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP16]], i32 2) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP18]], i32 2) +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP22]], i32 2) +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] // CHECK1: .cancel.exit6: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue7: @@ -682,21 +701,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP25]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: cancel.exit: // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP27]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: cancel.exit: +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -15,45 +15,37 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, ptr [[B]], align 4, !dbg [[DBG14:![0-9]+]] -// CHECK1-NEXT: store { float, float } [[TMP0]], ptr [[B_CASTED]], align 4, !dbg [[DBG14]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]), !dbg [[DBG14]] -// CHECK1-NEXT: ret void, !dbg [[DBG15:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0, !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_REAL]], ptr [[DOTREALP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_IMAG]], ptr [[DOTIMAGP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG14]] +// CHECK1-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG18:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B_ADDR]], align 4, !dbg [[DBG38]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(ptr [[TMP0]], ptr [[TMP1]], <2 x float> [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] -// CHECK1-NEXT: ret void, !dbg [[DBG38]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META32:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK1-NEXT: [[TMP2:%.*]] = load { float, float }, ptr [[TMP1]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: store { float, float } [[TMP2]], ptr [[B]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: ret void, !dbg [[DBG35:![0-9]+]] // diff --git a/clang/test/OpenMP/debug-info-openmp-array.cpp b/clang/test/OpenMP/debug-info-openmp-array.cpp --- a/clang/test/OpenMP/debug-info-openmp-array.cpp +++ b/clang/test/OpenMP/debug-info-openmp-array.cpp @@ -20,6 +20,7 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] @@ -31,20 +32,24 @@ // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG17]] // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[M_ADDR]], i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG26:![0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP3]]), !dbg [[DBG27]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG26:![0-9]+]] +// CHECK1-NEXT: store ptr [[M_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG26]] +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG26]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP6]]), !dbg [[DBG27]] // CHECK1-NEXT: ret void, !dbg [[DBG27]] // // -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG28:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG28:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -56,122 +61,93 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG44:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG44]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG44]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG47:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_1]], metadata [[META46]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0, !dbg [[DBG44]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG44]] -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG44]] -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_1]], metadata [[META45]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0, !dbg [[DBG47:![0-9]+]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] // CHECK1-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]], !dbg [[DBG44]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG44]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]], !dbg [[DBG47]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I3]], metadata [[META48]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG44]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG55:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]], !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I3]], metadata [[META48]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]], !dbg [[DBG51]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG51]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] // CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG51]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] // CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG51]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG51]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ], !dbg [[DBG51]] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG44]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG44]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG44]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]], !dbg [[DBG47]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1, !dbg [[DBG49]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1, !dbg [[DBG49]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG49]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !dbg [[DBG49]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG56:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG59:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]], !dbg [[DBG59]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG60:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG61:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG57:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]], !dbg [[DBG58]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG60:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG55]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1, !dbg [[DBG44]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG55]] +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG43]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !dbg [[DBG55]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP21]]), !dbg [[DBG63:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP25]]), !dbg [[DBG62:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG43]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void, !dbg [[DBG64:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3]] !dbg [[DBG65:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]], i64 [[TMP1]], ptr [[TMP6]]) #[[ATTR4:[0-9]+]], !dbg [[DBG72]] -// CHECK1-NEXT: ret void, !dbg [[DBG72]] +// CHECK1-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // diff --git a/clang/test/OpenMP/debug_threadprivate_copyin.c b/clang/test/OpenMP/debug_threadprivate_copyin.c --- a/clang/test/OpenMP/debug_threadprivate_copyin.c +++ b/clang/test/OpenMP/debug_threadprivate_copyin.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // This testcase checks emission of debug info for threadprivate variables // present in any clause of OpenMP construct. @@ -6,13 +7,6 @@ // RUN: %clang_cc1 -debug-info-kind=constructor -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics -// CHECK: define internal void @.omp_outlined._debug__( -// CHECK: call void @llvm.dbg.declare(metadata ptr %.global_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %.bound_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %nt.addr -// CHECK: store ptr %gbl_dynamic_int, ptr %gbl_dynamic_int.addr, align 8 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_dynamic_int.addr -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_static_int.addr extern int printf(const char *, ...); extern void omp_set_num_threads(int); @@ -24,6 +18,33 @@ #pragma omp threadprivate(gbl_dynamic_int) +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[NT:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OFFSET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[NT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[NT]], align 4, !dbg [[DBG24]] +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[OFFSET]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] +// CHECK-NEXT: store i32 10, ptr [[OFFSET]], align 4, !dbg [[DBG26]] +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_dynamic_int), !dbg [[DBG27:![0-9]+]] +// CHECK-NEXT: store i32 55, ptr [[TMP0]], align 4, !dbg [[DBG28:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_static_int), !dbg [[DBG29:![0-9]+]] +// CHECK-NEXT: store i32 77, ptr [[TMP1]], align 4, !dbg [[DBG30:![0-9]+]] +// CHECK-NEXT: call void @omp_set_num_threads(i32 noundef 4), !dbg [[DBG31:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG32:![0-9]+]] +// CHECK-NEXT: store ptr [[NT]], ptr [[TMP2]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_dynamic_int), !dbg [[DBG33:![0-9]+]] +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP6:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_static_int), !dbg [[DBG35:![0-9]+]] +// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG32]] +// CHECK-NEXT: ret i32 0, !dbg [[DBG36:![0-9]+]] +// int main() { int nt = 0; int offset = 10; diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -28,7 +28,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -36,9 +37,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -46,20 +52,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/declare_target_constexpr_codegen.cpp b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp --- a/clang/test/OpenMP/declare_target_constexpr_codegen.cpp +++ b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp @@ -16,8 +16,15 @@ public: static constexpr double pi = 3.141592653589793116; //. +// CHECK: @__omp_rtl_debug_kind = weak_odr hidden constant i32 1 +// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 +// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 +// CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 +// CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 // CHECK: @_ZN1A2piE = linkonce_odr constant double 0x400921FB54442D18, comdat, align 8 +// CHECK: @"__ZN1A2piE$ref" = internal constant ptr @_ZN1A2piE // CHECK: @_ZL9anotherPi = internal constant double 3.140000e+00, align 8 +// CHECK: @"__ZL9anotherPi$ref" = internal constant ptr @_ZL9anotherPi // CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @"__ZN1A2piE$ref", ptr @"__ZL9anotherPi$ref"], section "llvm.metadata" //. A() { ; } @@ -36,5 +43,13 @@ // +//. +// CHECK: !0 = !{i32 1, !"_ZL9anotherPi", i32 0, i32 1} +// CHECK: !1 = !{i32 1, !"_ZN1A2piE", i32 0, i32 0} +// CHECK: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !3 = !{i32 7, !"openmp", i32 50} +// CHECK: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK: !5 = !{!"clang version 17.0.0"} +//. //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: // CHECK: {{.*}} diff --git a/clang/test/OpenMP/declare_variant_construct_codegen_1.c b/clang/test/OpenMP/declare_variant_construct_codegen_1.c --- a/clang/test/OpenMP/declare_variant_construct_codegen_1.c +++ b/clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -72,13 +72,13 @@ { vxv(v1, v2, v3, N); } -// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[PARALLEL_REGION:@.+]] to void return 0; } // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) -// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[TARGET_REGION:@.+]] to void // CK1: define internal void [[TARGET_REGION]]( // CK1: call void @t_vxv @@ -167,11 +167,11 @@ { test_base(v1, v2, v3, 0); } -// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[PARALLEL_REGION:@.+]] to void +// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[PARALLEL_REGION:@.+]] to void } // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}}) -// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[TARGET_REGION_1:@.+]] to void +// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[TARGET_REGION_1:@.+]] to void // CK2: define internal void [[TARGET_REGION_1]]( // CK2: call void @test_teams diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -205,23 +205,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -231,79 +237,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -396,23 +404,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -422,79 +436,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -587,23 +603,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -613,96 +635,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -783,17 +807,20 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -806,59 +833,61 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK1-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -866,16 +895,16 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -950,17 +979,20 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -970,70 +1002,72 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // @@ -1133,23 +1167,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1159,75 +1199,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1320,23 +1362,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1346,75 +1394,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1507,23 +1557,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1533,92 +1589,94 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1699,17 +1757,20 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[A_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -1722,59 +1783,61 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK3-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -1782,16 +1845,16 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1866,17 +1929,20 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1886,70 +1952,72 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1967,23 +2035,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1993,79 +2067,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2076,23 +2152,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2102,79 +2184,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2185,23 +2269,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2211,96 +2301,98 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2308,17 +2400,20 @@ // CHECK17-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[A_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2331,59 +2426,61 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK17-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK17-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK17-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -2391,16 +2488,16 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -2410,17 +2507,20 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2430,70 +2530,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK17-NEXT: ret void // // @@ -2504,23 +2606,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2530,75 +2638,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2609,23 +2719,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2635,75 +2751,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2714,23 +2832,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2740,92 +2864,94 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2833,17 +2959,20 @@ // CHECK19-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[A_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2856,59 +2985,61 @@ // CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK19-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK19-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK19-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -2916,16 +3047,16 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -2935,17 +3066,20 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2955,69 +3089,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK19-NEXT: ret void // diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp @@ -176,25 +176,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -203,104 +209,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -340,6 +348,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -353,20 +362,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -375,104 +389,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // @@ -637,6 +653,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -646,21 +663,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -669,117 +692,119 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP20]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -965,6 +990,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -973,20 +999,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -995,112 +1026,114 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1309,6 +1342,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1318,21 +1352,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1341,115 +1381,117 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP21]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP20]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1635,6 +1677,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1643,20 +1686,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1665,110 +1713,112 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp @@ -166,25 +166,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -193,99 +199,101 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP29]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP32]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -327,6 +335,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -340,20 +349,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -362,99 +376,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP29]], align 4 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP32]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -622,6 +638,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -631,21 +648,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -654,33 +677,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -690,99 +715,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -968,6 +993,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -976,20 +1002,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -998,30 +1029,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1031,97 +1064,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP30]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1331,6 +1364,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1340,21 +1374,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1363,33 +1403,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1399,97 +1441,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1675,6 +1717,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1683,20 +1726,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1705,30 +1753,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1738,95 +1788,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP29]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP30]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -801,23 +801,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -827,93 +833,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -924,110 +944,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1040,23 +1064,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1066,93 +1096,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1163,110 +1207,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1280,25 +1328,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1308,120 +1363,134 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1432,110 +1501,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1548,23 +1621,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1574,93 +1653,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1671,110 +1764,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1788,25 +1885,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1817,102 +1921,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1923,131 +2040,137 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -2060,23 +2183,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2086,93 +2215,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2183,99 +2326,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2296,25 +2443,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2325,102 +2479,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2431,101 +2598,107 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2579,23 +2752,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2605,91 +2784,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2700,105 +2893,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -2811,23 +3008,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2837,91 +3040,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2932,105 +3149,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3044,25 +3265,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3072,118 +3300,132 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3194,105 +3436,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3305,23 +3551,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3331,91 +3583,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3426,105 +3692,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3538,25 +3808,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3567,100 +3844,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3671,124 +3961,130 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3801,23 +4097,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3827,91 +4129,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3922,94 +4238,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4030,25 +4350,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4059,100 +4386,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4163,96 +4503,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4906,23 +5252,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4932,93 +5284,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5031,98 +5397,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5135,23 +5505,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5161,93 +5537,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5260,98 +5650,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5365,25 +5759,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5393,120 +5794,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5519,98 +5934,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5623,23 +6042,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5649,93 +6074,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5748,98 +6187,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5853,25 +6296,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -5882,102 +6332,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5990,119 +6453,125 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -6115,23 +6584,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6141,93 +6616,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6240,87 +6729,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6341,25 +6834,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6370,102 +6870,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6478,89 +6991,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -7204,23 +7723,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7230,93 +7755,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7329,111 +7868,115 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] @@ -7448,23 +7991,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7474,93 +8023,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7573,98 +8136,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7678,25 +8245,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7706,120 +8280,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7832,98 +8420,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7936,23 +8528,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7962,93 +8560,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8061,98 +8673,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8166,25 +8782,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8195,102 +8818,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8303,119 +8939,125 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX15]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8428,23 +9070,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8454,93 +9102,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8553,87 +9215,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -8654,25 +9320,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8683,102 +9356,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8791,89 +9477,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9527,23 +10219,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9553,91 +10251,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9650,93 +10362,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9749,23 +10465,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9775,91 +10497,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9872,93 +10608,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9972,25 +10712,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10000,118 +10747,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10124,93 +10885,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10223,23 +10988,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10249,91 +11020,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10346,93 +11131,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10446,25 +11235,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10475,100 +11271,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10581,112 +11390,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10699,23 +11514,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10725,91 +11546,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10822,82 +11657,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -10918,25 +11757,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10947,100 +11793,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11053,84 +11912,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11774,23 +12639,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11800,91 +12671,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11897,106 +12782,110 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: cancel.exit: -// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: omp.precond.end: // CHECK11-NEXT: br label [[CANCEL_CONT]] @@ -12011,23 +12900,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12037,91 +12932,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12134,93 +13043,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12234,25 +13147,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12262,118 +13182,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12386,93 +13320,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12485,23 +13423,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12511,91 +13455,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12608,93 +13566,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12708,25 +13670,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12737,100 +13706,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12843,112 +13825,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12961,23 +13949,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12987,91 +13981,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13084,82 +14092,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -13180,25 +14192,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13209,100 +14228,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13315,84 +14347,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -289,25 +289,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -316,109 +322,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store double [[TMP21]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store double [[TMP24]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store double [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: store double [[TMP32]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -427,80 +439,92 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP23]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -540,6 +564,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -553,20 +578,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -575,103 +605,111 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4 -// CHECK3-NEXT: store double [[TMP20]], ptr [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store double [[TMP29]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -679,83 +717,93 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP12]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // @@ -920,6 +968,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -929,21 +978,27 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -952,131 +1007,142 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: store i32 [[TMP21]], ptr [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR9]], align 4 -// CHECK8-NEXT: store i32 [[TMP24]], ptr [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP25:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[VEC4]], i64 [[TMP22]], ptr [[S_ARR5]], ptr [[TMP23]], i64 [[TMP25]]) +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK8-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK8-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP29]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP32]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK8-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done12: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1084,115 +1150,123 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1210,10 +1284,10 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK8-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1225,9 +1299,9 @@ // CHECK8-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK8-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1298,12 +1372,12 @@ // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1378,6 +1452,7 @@ // CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1386,20 +1461,25 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1408,121 +1488,133 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[VEC4]], i64 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]]) +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK8-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK8-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1530,114 +1622,120 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1657,7 +1755,7 @@ // CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, ptr [[F]], align 4 // CHECK8-NEXT: ret void // @@ -1670,7 +1768,7 @@ // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK8-NEXT: ret void @@ -1846,6 +1944,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1855,21 +1954,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1878,129 +1983,140 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR9]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[VEC4]], i32 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]], i32 [[TMP23]]) +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK10-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2008,111 +2124,119 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP30]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2130,10 +2254,10 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK10-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2145,9 +2269,9 @@ // CHECK10-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK10-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2218,12 +2342,12 @@ // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2298,6 +2422,7 @@ // CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2306,20 +2431,25 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2328,119 +2458,131 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP17]], ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[VEC4]], i32 [[TMP18]], ptr [[S_ARR5]], ptr [[TMP19]]) +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK10-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2448,110 +2590,116 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK10-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2571,7 +2719,7 @@ // CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, ptr [[F]], align 4 // CHECK10-NEXT: ret void // @@ -2584,7 +2732,7 @@ // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK10-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -199,77 +199,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -279,139 +291,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -421,43 +449,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -465,14 +497,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -609,77 +641,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -689,43 +733,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -733,96 +781,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -832,43 +892,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -876,14 +940,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -891,95 +955,108 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -989,43 +1066,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -1033,14 +1114,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1175,77 +1256,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1255,43 +1348,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1299,96 +1396,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1398,43 +1507,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1442,14 +1555,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1457,95 +1570,108 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1555,43 +1681,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1599,14 +1729,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -230,25 +230,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -257,100 +263,114 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[G3]], ptr [[TMP17]], ptr [[SVAR6]], ptr [[SFVAR7]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -358,105 +378,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -498,6 +522,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -511,20 +536,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -533,98 +563,112 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[G3]], ptr [[TMP15]], ptr [[SVAR6]], ptr [[SFVAR7]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP35]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -632,103 +676,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -896,6 +944,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -905,21 +954,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -928,33 +983,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -964,105 +1024,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP18]], ptr [[SVAR8]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP40]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP41]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1070,39 +1140,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1112,99 +1186,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1222,10 +1296,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1237,9 +1311,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1310,12 +1384,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1390,6 +1464,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1398,20 +1473,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1420,135 +1500,149 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1556,136 +1650,140 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1705,7 +1803,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1718,7 +1816,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1895,6 +1993,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1904,21 +2003,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1927,33 +2032,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1963,103 +2073,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP16]], ptr [[SVAR8]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP38]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP39]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2067,37 +2187,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2107,97 +2231,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2215,10 +2339,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2230,9 +2354,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2303,12 +2427,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2383,6 +2507,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2391,20 +2516,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2413,133 +2543,147 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2547,132 +2691,136 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2692,7 +2840,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2705,7 +2853,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -252,78 +252,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -333,43 +345,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -379,20 +395,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -407,84 +423,97 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -494,43 +523,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -540,20 +573,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -745,78 +778,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -826,43 +871,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -872,98 +921,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -973,43 +1034,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1019,98 +1084,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1120,43 +1197,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1166,70 +1247,77 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -1238,41 +1326,46 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1282,43 +1375,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1328,20 +1425,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -1511,78 +1608,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1592,43 +1701,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1638,20 +1751,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1666,84 +1779,97 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1753,43 +1879,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1799,20 +1929,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -1995,78 +2125,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2076,43 +2218,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2122,98 +2268,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2223,43 +2381,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2269,98 +2431,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2370,43 +2544,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2416,70 +2594,77 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -2488,41 +2673,46 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2532,43 +2722,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2578,20 +2772,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -2770,78 +2964,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2851,43 +3057,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2897,20 +3107,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK9-NEXT: unreachable // // @@ -2925,84 +3135,97 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3012,43 +3235,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3058,20 +3285,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -3263,78 +3490,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3344,43 +3583,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3390,98 +3633,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3491,43 +3746,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3537,98 +3796,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3638,43 +3909,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3684,70 +3959,77 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -3756,41 +4038,46 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK9-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3800,43 +4087,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3846,20 +4137,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -4029,78 +4320,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4110,43 +4413,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4156,20 +4463,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK13-NEXT: unreachable // // @@ -4184,84 +4491,97 @@ // CHECK13-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4271,43 +4591,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4317,20 +4641,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // @@ -4513,78 +4837,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4594,43 +4930,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4640,98 +4980,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4741,43 +5093,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4787,98 +5143,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4888,43 +5256,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4934,70 +5306,77 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -5006,41 +5385,46 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK13-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5050,43 +5434,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -5096,20 +5484,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -167,69 +169,79 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -243,77 +255,81 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -340,15 +356,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -356,67 +374,77 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -430,75 +458,79 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -610,15 +642,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -626,6 +660,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -633,8 +669,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -652,55 +691,61 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -710,12 +755,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -732,15 +776,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -757,65 +805,65 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -838,10 +886,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -849,9 +897,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -889,12 +937,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -964,15 +1012,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -980,81 +1030,92 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1063,12 +1124,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1078,97 +1138,101 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1192,7 +1256,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1205,7 +1269,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1328,15 +1392,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1344,6 +1410,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1351,8 +1419,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1370,53 +1441,59 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1426,12 +1503,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1448,15 +1524,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1471,63 +1551,63 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1550,10 +1630,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1561,9 +1641,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1601,12 +1681,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1676,15 +1756,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1692,79 +1774,90 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1773,12 +1866,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1788,93 +1880,97 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1898,7 +1994,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1911,7 +2007,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp @@ -135,78 +135,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -216,135 +228,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -354,57 +382,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -452,78 +484,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -533,57 +577,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -57,358 +57,378 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, i64 [[TMP7]], i64 [[TMP8]], ptr [[ARGC_ADDR]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = add nuw i64 [[TMP42]], 1 -// CHECK1-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP44]], ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..3, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP51]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP54]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP55]], 9 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..2, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..3, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP56]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP57]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP58]], [[TMP59]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP60]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB2]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB2]], i32 [[TMP75]], ptr [[TMP67]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB2]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB2]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP77]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP79]]) -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP81]], i32 1) -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr +// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP93]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 -// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 -// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -547,20 +567,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -574,7 +594,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -801,23 +801,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -827,83 +833,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -916,16 +941,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -936,116 +956,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1064,23 +1088,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1090,83 +1120,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1179,16 +1228,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1199,116 +1243,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1328,25 +1376,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1356,110 +1411,129 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1472,16 +1546,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1492,116 +1561,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1620,23 +1693,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1646,83 +1725,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1735,16 +1833,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1755,116 +1848,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1884,25 +1981,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1913,91 +2017,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2010,17 +2132,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2031,137 +2148,143 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 8, !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP55]], 0 // CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -2180,23 +2303,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2206,83 +2335,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2295,16 +2443,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2315,99 +2458,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2415,12 +2562,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2440,25 +2587,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2469,91 +2623,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2566,17 +2738,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2587,101 +2754,107 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2689,12 +2862,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -2747,23 +2920,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2773,81 +2952,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2860,16 +3058,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2880,111 +3073,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3003,23 +3200,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3029,81 +3232,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3116,16 +3338,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3136,111 +3353,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3260,25 +3481,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3288,108 +3516,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -3402,16 +3649,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3422,111 +3664,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3545,23 +3791,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3571,81 +3823,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3658,16 +3929,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3678,111 +3944,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3802,25 +4072,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3831,89 +4108,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -3926,17 +4221,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3947,130 +4237,136 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK3-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK3-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP55]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -4089,23 +4385,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4115,81 +4417,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4202,16 +4523,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4222,94 +4538,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4317,12 +4637,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK3-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -4342,25 +4662,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4371,89 +4698,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -4466,17 +4811,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4487,96 +4827,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4584,12 +4930,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -5296,23 +5642,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5322,83 +5674,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5411,16 +5782,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5433,104 +5799,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5549,23 +5919,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5575,83 +5951,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5664,16 +6059,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5686,104 +6076,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5803,25 +6197,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5831,110 +6232,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -5947,16 +6367,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5969,104 +6384,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -6085,23 +6504,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6111,83 +6536,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6200,16 +6644,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6222,104 +6661,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -6339,25 +6782,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6368,91 +6818,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6465,17 +6933,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6488,125 +6951,131 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -6625,23 +7094,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6651,83 +7126,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6740,16 +7234,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6762,87 +7251,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6850,12 +7343,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -6875,25 +7368,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6904,91 +7404,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -7001,17 +7519,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7024,89 +7537,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -7114,12 +7633,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -7762,23 +8281,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7788,83 +8313,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP62]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7877,16 +8421,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7899,104 +8438,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8015,23 +8558,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8041,83 +8590,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP68]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8130,16 +8698,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8152,104 +8715,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8269,25 +8836,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8297,110 +8871,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -8413,16 +9006,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8435,104 +9023,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP77]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8551,23 +9143,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8577,83 +9175,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP80]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP81:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8666,16 +9283,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8688,104 +9300,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP83]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP84:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8805,25 +9421,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8834,91 +9457,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP86]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -8931,17 +9572,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8954,125 +9590,131 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89:![0-9]+]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX15]], align 4, !llvm.access.group [[ACC_GRP89]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -9091,23 +9733,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9117,83 +9765,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP92]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -9206,16 +9873,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9228,87 +9890,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP95]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP96:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9316,12 +9982,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -9341,25 +10007,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -9370,91 +10043,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP98]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9467,17 +10158,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9490,89 +10176,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP101]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9580,12 +10272,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -10238,23 +10930,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10264,81 +10962,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10351,16 +11068,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10373,99 +11085,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10484,23 +11200,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10510,81 +11232,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP27]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10597,16 +11338,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10619,99 +11355,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10731,25 +11471,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10759,108 +11506,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -10873,16 +11639,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10895,99 +11656,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -11006,23 +11771,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11032,81 +11803,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11119,16 +11909,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11141,99 +11926,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -11253,25 +12042,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11282,89 +12078,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP45]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11377,17 +12191,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11400,118 +12209,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -11530,23 +12345,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11556,81 +12377,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP51]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11643,16 +12483,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11665,82 +12500,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11748,12 +12587,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -11773,25 +12612,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11802,89 +12648,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11897,17 +12761,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11920,84 +12779,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP60]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -12005,12 +12870,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12653,23 +13518,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12679,81 +13550,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP63]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12766,16 +13656,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12788,99 +13673,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP66]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP67:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12899,23 +13788,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12925,81 +13820,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP69]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13012,16 +13926,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13034,99 +13943,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP72]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13146,25 +14059,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13174,108 +14094,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP76:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -13288,16 +14227,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13310,99 +14244,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP78]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13421,23 +14359,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13447,81 +14391,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP81]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13534,16 +14497,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13556,99 +14514,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP84]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13668,25 +14630,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13697,89 +14666,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP87]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13792,17 +14779,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13815,118 +14797,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP90]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -13945,23 +14933,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13971,81 +14965,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP93]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14058,16 +15071,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14080,82 +15088,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP96]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -14163,12 +15175,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -14188,25 +15200,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -14217,89 +15236,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP99]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -14312,17 +15349,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14335,84 +15367,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP102]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -14420,12 +15458,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -288,25 +288,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -315,98 +321,105 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP21]], ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP24]], ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float [[TMP28]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP32]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -415,16 +428,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -433,83 +445,95 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -553,6 +577,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -566,20 +591,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -588,92 +618,102 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double [[TMP20]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double [[TMP29]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -682,16 +722,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -699,86 +737,96 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -983,6 +1031,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -992,21 +1041,27 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1015,138 +1070,149 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: store i32 [[TMP21]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP22:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR9]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: store i32 [[TMP24]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP25:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[VEC4]], i64 [[TMP22]], ptr [[S_ARR5]], ptr [[TMP23]], i64 [[TMP25]]), !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK8-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK8-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK8-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK8-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done12: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1154,122 +1220,130 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP29]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK8-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK8-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1287,10 +1361,10 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK8-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1302,9 +1376,9 @@ // CHECK8-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK8-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1375,12 +1449,12 @@ // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1455,6 +1529,7 @@ // CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1463,20 +1538,25 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1485,128 +1565,140 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP20:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[VEC4]], i64 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]]), !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK8-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK8-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK8-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK8-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1614,121 +1706,127 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK8-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK8-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1748,7 +1846,7 @@ // CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, ptr [[F]], align 4 // CHECK8-NEXT: ret void // @@ -1761,7 +1859,7 @@ // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK8-NEXT: ret void @@ -1937,6 +2035,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1946,21 +2045,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1969,136 +2074,147 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR9]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[VEC4]], i32 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]], i32 [[TMP23]]), !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK10-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK10-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2106,118 +2222,126 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP30]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP29]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK10-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK10-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2235,10 +2359,10 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK10-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2250,9 +2374,9 @@ // CHECK10-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK10-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2323,12 +2447,12 @@ // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2403,6 +2527,7 @@ // CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2411,20 +2536,25 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2433,126 +2563,138 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: store i32 [[TMP17]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[VEC4]], i32 [[TMP18]], ptr [[S_ARR5]], ptr [[TMP19]]), !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK10-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK10-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2560,117 +2702,123 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK10-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK10-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK10-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2690,7 +2838,7 @@ // CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, ptr [[F]], align 4 // CHECK10-NEXT: ret void // @@ -2703,7 +2851,7 @@ // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK10-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -196,70 +196,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -268,12 +281,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -283,60 +295,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -347,75 +363,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -424,12 +453,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -439,43 +467,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -483,17 +515,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -634,70 +666,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -706,12 +751,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -721,43 +765,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -765,17 +813,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -786,75 +834,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -863,12 +924,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -878,43 +938,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -922,17 +986,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -944,88 +1008,102 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1034,12 +1112,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1049,43 +1126,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -1093,17 +1174,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1242,70 +1323,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1314,12 +1408,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1329,43 +1422,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -1373,17 +1470,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1394,75 +1491,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1471,12 +1581,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1486,43 +1595,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -1530,17 +1643,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1552,88 +1665,102 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1642,12 +1769,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1657,43 +1783,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -1701,17 +1831,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1805,70 +1935,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1877,12 +2020,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1892,60 +2034,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1956,75 +2102,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2033,12 +2192,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2048,43 +2206,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -2092,17 +2254,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2243,70 +2405,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2315,12 +2490,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2330,43 +2504,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -2374,17 +2552,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2395,75 +2573,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2472,12 +2663,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2487,43 +2677,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2531,17 +2725,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2553,17 +2747,20 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2571,78 +2768,88 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP21]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2650,39 +2857,45 @@ // CHECK3: omp_if.else7: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK3: omp.inner.for.cond8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK3: omp.inner.for.body10: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1 -// CHECK3-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK3-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK3-NEXT: store i64 [[TMP29]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP32]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK3: omp_if.else16: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP35]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] // CHECK3: omp_if.end18: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK3: omp.inner.for.inc19: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end21: @@ -2690,10 +2903,10 @@ // CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2702,13 +2915,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2718,48 +2930,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -2767,60 +2987,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2829,13 +3049,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2845,48 +3064,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -2894,60 +3121,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3086,70 +3313,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3158,12 +3398,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3173,43 +3412,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -3217,17 +3460,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3238,75 +3481,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3315,12 +3571,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3330,43 +3585,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3374,17 +3633,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3396,88 +3655,102 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[ARG_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3486,12 +3759,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3501,43 +3773,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -3545,17 +3821,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4225,70 +4501,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4297,12 +4586,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4312,60 +4600,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4376,75 +4668,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4453,12 +4758,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4468,43 +4772,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -4512,17 +4820,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4663,70 +4971,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4735,12 +5056,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4750,43 +5070,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -4794,17 +5118,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4815,75 +5139,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4892,12 +5229,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4907,43 +5243,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -4951,17 +5291,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4973,88 +5313,102 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5063,12 +5417,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5078,43 +5431,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -5122,17 +5479,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5271,70 +5628,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5343,12 +5713,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5358,43 +5727,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -5402,17 +5775,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5423,75 +5796,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5500,12 +5886,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5515,43 +5900,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -5559,17 +5948,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5581,88 +5970,102 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5671,12 +6074,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5686,43 +6088,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -5730,17 +6136,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5834,70 +6240,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5906,12 +6325,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5921,60 +6339,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5985,75 +6407,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6062,12 +6497,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6077,43 +6511,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -6121,17 +6559,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6272,70 +6710,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6344,12 +6795,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6359,43 +6809,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -6403,17 +6857,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6424,75 +6878,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6501,12 +6968,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6516,43 +6982,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6560,17 +7030,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6582,17 +7052,20 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6600,78 +7073,88 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP21]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -6679,39 +7162,45 @@ // CHECK11: omp_if.else7: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK11: omp.inner.for.cond8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK11: omp.inner.for.body10: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1 -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK11-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK11-NEXT: store i64 [[TMP29]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP32]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK11: omp_if.else16: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP35]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] // CHECK11: omp_if.end18: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK11: omp.inner.for.inc19: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end21: @@ -6719,10 +7208,10 @@ // CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6731,13 +7220,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6747,48 +7235,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -6796,60 +7292,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6858,13 +7354,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6874,48 +7369,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -6923,60 +7426,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7115,70 +7618,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7187,12 +7703,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7202,43 +7717,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -7246,17 +7765,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7267,75 +7786,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7344,12 +7876,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7359,43 +7890,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7403,17 +7938,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7425,88 +7960,102 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[ARG_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7515,12 +8064,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7530,43 +8078,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -7574,17 +8126,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -233,25 +233,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -260,107 +266,121 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[G3]], ptr [[TMP17]], ptr [[SVAR6]], ptr [[SFVAR7]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -368,112 +388,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -515,6 +539,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -528,20 +553,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -550,105 +580,119 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[G3]], ptr [[TMP15]], ptr [[SVAR6]], ptr [[SFVAR7]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -656,110 +700,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -963,6 +1011,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -972,21 +1021,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -995,33 +1050,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1031,112 +1091,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP18]], ptr [[SVAR8]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP42]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP43]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1144,39 +1214,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1186,106 +1260,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1303,10 +1377,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1318,9 +1392,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1391,12 +1465,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1471,6 +1545,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1479,20 +1554,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1501,142 +1581,156 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP17]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP39]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1644,143 +1738,147 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1800,7 +1898,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1813,7 +1911,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1990,6 +2088,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1999,21 +2098,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2022,33 +2127,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2058,110 +2168,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP16]], ptr [[SVAR8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP40]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP41]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2169,37 +2289,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2209,104 +2333,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2324,10 +2448,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2339,9 +2463,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2412,12 +2536,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2492,6 +2616,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2500,20 +2625,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2522,140 +2652,154 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP15]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2663,139 +2807,143 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2815,7 +2963,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2828,7 +2976,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -252,71 +252,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -325,12 +338,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -340,43 +352,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -386,27 +402,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: unreachable // // @@ -421,77 +437,91 @@ // CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -500,12 +530,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -515,43 +544,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -561,27 +594,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: unreachable // // @@ -773,71 +806,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -846,12 +892,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -861,43 +906,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -907,98 +956,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1007,12 +1069,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1022,43 +1083,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1068,98 +1133,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1168,12 +1246,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1183,43 +1260,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1229,77 +1310,84 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -1308,48 +1396,53 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1359,43 +1452,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1405,27 +1502,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: unreachable // // @@ -1934,71 +2031,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2007,12 +2117,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2022,43 +2131,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2068,27 +2181,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: unreachable // // @@ -2103,77 +2216,91 @@ // CHECK5-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2182,12 +2309,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2197,43 +2323,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2243,27 +2373,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: unreachable // // @@ -2446,71 +2576,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2519,12 +2662,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2534,43 +2676,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2580,98 +2726,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2680,12 +2839,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2695,43 +2853,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2741,98 +2903,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2841,12 +3016,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2856,43 +3030,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2902,77 +3080,84 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -2981,48 +3166,53 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3032,43 +3222,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -3078,27 +3272,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: unreachable // // @@ -3277,71 +3471,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3350,12 +3557,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3365,43 +3571,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3411,27 +3621,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: unreachable // // @@ -3446,77 +3656,91 @@ // CHECK9-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3525,12 +3749,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3540,43 +3763,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3586,27 +3813,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: unreachable // // @@ -3798,71 +4025,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3871,12 +4111,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3886,43 +4125,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3932,98 +4175,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4032,12 +4288,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4047,43 +4302,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -4093,98 +4352,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4193,12 +4465,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4208,43 +4479,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -4254,77 +4529,84 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -4333,48 +4615,53 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4384,43 +4671,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -4430,27 +4721,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: unreachable // // @@ -4959,71 +5250,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5032,12 +5336,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5047,43 +5350,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5093,27 +5400,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: unreachable // // @@ -5128,77 +5435,91 @@ // CHECK13-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5207,12 +5528,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5222,43 +5542,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5268,27 +5592,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: unreachable // // @@ -5471,71 +5795,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5544,12 +5881,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5559,43 +5895,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5605,98 +5945,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5705,12 +6058,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5720,43 +6072,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5766,98 +6122,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5866,12 +6235,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5881,43 +6249,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5927,77 +6299,84 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -6006,48 +6385,53 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK13-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6057,43 +6441,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -6103,27 +6491,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -167,62 +169,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -231,12 +244,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -250,80 +262,84 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -354,15 +370,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -370,60 +388,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -432,12 +461,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -451,78 +479,82 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -664,15 +696,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -680,6 +714,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -687,8 +723,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -706,62 +745,68 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -771,12 +816,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -793,15 +837,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -818,72 +866,72 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -906,10 +954,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -917,9 +965,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -957,12 +1005,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1032,15 +1080,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1048,88 +1098,99 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1138,12 +1199,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1153,104 +1213,108 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1274,7 +1338,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1287,7 +1351,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1410,15 +1474,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1426,6 +1492,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1433,8 +1501,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1452,60 +1523,66 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1515,12 +1592,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1537,15 +1613,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1560,70 +1640,70 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1646,10 +1726,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1657,9 +1737,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1697,12 +1777,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1772,15 +1852,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1788,86 +1870,97 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1876,12 +1969,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1891,100 +1983,104 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2008,7 +2104,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2021,7 +2117,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -135,71 +135,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -208,12 +221,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -223,60 +235,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -287,71 +303,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -360,12 +389,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -375,60 +403,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -480,71 +512,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -553,12 +598,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -568,60 +612,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp --- a/clang/test/OpenMP/distribute_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_private_codegen.cpp @@ -130,15 +130,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -155,66 +157,68 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -241,15 +245,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -266,66 +272,68 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -473,15 +481,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -498,6 +508,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -515,65 +527,65 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -595,15 +607,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -614,49 +628,51 @@ // CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -664,10 +680,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -675,9 +691,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -715,12 +731,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -790,15 +806,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -808,89 +826,91 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -914,7 +934,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -927,7 +947,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1086,15 +1106,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1111,6 +1133,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1128,63 +1152,63 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1206,15 +1230,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1225,49 +1251,51 @@ // CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void // // @@ -1275,10 +1303,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1286,9 +1314,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1326,12 +1354,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1401,15 +1429,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1419,87 +1449,89 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1523,7 +1555,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1536,7 +1568,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -233,23 +233,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -259,84 +265,86 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -433,23 +441,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -459,82 +473,84 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -631,23 +647,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -657,99 +679,101 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -846,119 +870,125 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK1-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK1-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK1-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK1-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK1-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK1-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK1-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -1035,17 +1065,20 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1055,73 +1088,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1225,23 +1260,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1251,80 +1292,82 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1421,23 +1464,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1447,78 +1496,80 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1615,23 +1666,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1641,95 +1698,97 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1826,119 +1885,125 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK3-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK3-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK3-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK3-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK3-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK3-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK3-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK3-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK3-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK3-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK3-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -2015,17 +2080,20 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2035,73 +2103,75 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2205,23 +2275,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2231,84 +2307,86 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2405,23 +2483,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2431,82 +2515,84 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK5-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2603,23 +2689,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2629,99 +2721,101 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK5-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2818,150 +2912,156 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK5-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK5-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK5-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK5-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK5-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !15, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK5-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK5-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !15, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK5: omp.inner.for.cond13: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK5: omp.inner.for.body15: -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: // CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK5: omp.body.continue20: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK5: omp.inner.for.inc21: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP24:![0-9]+]] -// CHECK5: omp.inner.for.end23: +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK5: omp.body.continue19: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK5: omp.inner.for.inc20: +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK5: omp.inner.for.end22: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK5-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK5-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK5-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK5-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK5-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK5-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK5-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK5-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK5-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK5-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK5-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK5-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK5-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK5-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK5-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK5-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -3038,17 +3138,20 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3058,73 +3161,75 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3228,23 +3333,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3254,80 +3365,82 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK7-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] // CHECK7-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3424,23 +3537,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3450,78 +3569,80 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK7-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK7-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK7-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK7-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK7-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3618,23 +3739,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3644,95 +3771,97 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK7-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK7-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK7-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK7-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3829,150 +3958,156 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK7-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK7-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK7-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK7-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK7-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] -// CHECK7-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK7-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK7-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK7-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK7-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK7-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK7: omp.inner.for.cond13: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK7-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK7: omp.inner.for.body15: -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: // CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK7-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK7: omp.body.continue20: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK7: omp.inner.for.inc21: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK7: omp.inner.for.end23: +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK7: omp.body.continue19: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK7: omp.inner.for.inc20: +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK7: omp.inner.for.end22: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK7-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK7-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK7-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK7-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK7-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK7-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK7-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK7-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK7-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK7-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK7-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK7-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK7-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK7-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK7-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK7-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] @@ -4049,17 +4184,20 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4069,73 +4207,75 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 100, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5495,23 +5635,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5521,84 +5667,86 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK17-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5613,23 +5761,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5639,82 +5793,84 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK17-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5729,23 +5885,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5755,99 +5917,101 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5860,119 +6024,125 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK17-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK17-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK17-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK17-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK17-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK17-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK17-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK17-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK17-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK17-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK17-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK17-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK17-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK17-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK17-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK17-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -5984,17 +6154,20 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6004,73 +6177,75 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 100, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6085,23 +6260,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6111,80 +6292,82 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6199,23 +6382,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6225,78 +6414,80 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK19-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6311,23 +6502,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6337,95 +6534,97 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6438,119 +6637,125 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK19-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK19-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK19-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK19-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK19-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK19-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK19-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK19-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK19-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK19-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK19-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK19-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK19-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK19-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK19-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK19-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK19-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: br label [[OMP_PRECOND_END]] @@ -6562,17 +6767,20 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6582,73 +6790,75 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 100, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6663,23 +6873,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6689,84 +6905,86 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK21-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK21-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] // CHECK21-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK21-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6781,23 +6999,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6807,82 +7031,84 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK21-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK21-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK21-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK21-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK21-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK21-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6897,23 +7123,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6923,99 +7155,101 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK21-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK21-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK21-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK21-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK21-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK21-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK21-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK21-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK21-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7028,150 +7262,156 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK21-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK21-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK21-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK21-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK21-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK21-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK21-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK21-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] -// CHECK21-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK21-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK21-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK21-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK21-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK21-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK21: omp.inner.for.cond13: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK21-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK21: omp.inner.for.body15: -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: // CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK21: omp.body.continue20: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK21: omp.inner.for.inc21: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK21: omp.inner.for.end23: +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK21: omp.body.continue19: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK21: omp.inner.for.inc20: +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK21: omp.inner.for.end22: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK21-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK21-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK21-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK21-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK21-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK21-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK21-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK21-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK21-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK21-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK21-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK21-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK21-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK21-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK21-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK21-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: // CHECK21-NEXT: br label [[OMP_PRECOND_END]] @@ -7183,17 +7423,20 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7203,73 +7446,75 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK21-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK21-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK21-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 100, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7284,23 +7529,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7310,80 +7561,82 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK23-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] // CHECK23-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK23-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7398,23 +7651,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7424,78 +7683,80 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK23-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK23-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK23-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK23-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK23-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK23-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK23-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK23-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7510,23 +7771,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7536,95 +7803,97 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK23-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK23-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK23-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] // CHECK23-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 // CHECK23-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK23-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK23-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK23-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7637,150 +7906,156 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK23-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK23-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK23-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK23-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK23-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK23-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK23-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK23-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP23]] -// CHECK23-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK23-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK23-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !17, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK23-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK23-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !17, !llvm.access.group [[ACC_GRP23]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK23-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK23: omp.inner.for.cond13: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK23-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK23: omp.inner.for.body15: -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: // CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK23-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK23: omp.body.continue20: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK23: omp.inner.for.inc21: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] -// CHECK23: omp.inner.for.end23: +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK23: omp.body.continue19: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK23: omp.inner.for.inc20: +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK23: omp.inner.for.end22: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK23-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK23-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK23-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK23-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK23-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK23-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK23-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK23-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK23-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK23-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK23-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK23-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK23-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK23-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK23-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK23-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: // CHECK23-NEXT: br label [[OMP_PRECOND_END]] @@ -7792,17 +8067,20 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7812,73 +8090,75 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK23-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK23-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK23-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 100, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -179,25 +179,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -206,107 +212,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -350,6 +358,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -363,20 +372,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -385,107 +399,109 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -690,6 +706,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -699,21 +716,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -722,124 +745,126 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1025,6 +1050,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1033,20 +1059,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1055,119 +1086,121 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1376,6 +1409,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1385,21 +1419,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1408,122 +1448,124 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP21]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1709,6 +1751,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1717,20 +1760,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1739,117 +1787,119 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -171,25 +171,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -198,106 +204,108 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP29]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -339,6 +347,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -352,20 +361,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -374,106 +388,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 4 -// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP29]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -677,6 +693,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -686,21 +703,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -709,33 +732,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -745,106 +770,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1030,6 +1055,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1038,20 +1064,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1060,30 +1091,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1093,104 +1126,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP32]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1400,6 +1433,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1409,21 +1443,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1432,33 +1472,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1468,104 +1510,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP34]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1751,6 +1793,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1759,20 +1802,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1781,30 +1829,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1814,102 +1864,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP32]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp @@ -131,15 +131,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -156,69 +158,71 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -249,15 +253,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -274,69 +280,71 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -529,15 +537,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -554,6 +564,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -571,72 +583,72 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -659,76 +671,81 @@ // CHECK9-SAME: (i64 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void @@ -738,10 +755,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -749,9 +766,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -789,12 +806,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -864,15 +881,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -882,96 +901,98 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -995,7 +1016,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1008,7 +1029,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1182,15 +1203,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1207,6 +1230,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1224,70 +1249,70 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP7]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1310,76 +1335,81 @@ // CHECK11-SAME: (i32 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I1]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 2, ptr [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: ret void @@ -1389,10 +1419,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1400,9 +1430,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1440,12 +1470,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1515,15 +1545,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1533,94 +1565,96 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1644,7 +1678,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1657,7 +1691,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -145,88 +145,93 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -294,88 +299,93 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -449,88 +459,93 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -598,88 +613,93 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -902,92 +922,97 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp @@ -420,6 +420,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -428,22 +429,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -514,14 +523,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -530,121 +536,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR7]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP25]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -866,11 +874,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -881,78 +889,80 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G1]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SIVAR3]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 2, ptr [[TMP15]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -1108,18 +1118,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1130,57 +1143,59 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[G1]], align 4 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1192,30 +1207,30 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP14]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP15]], ptr [[BLOCK_CAPTURED5]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK4-NEXT: call void [[TMP18]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP16:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP16]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK4-NEXT: call void [[TMP20]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -369,6 +369,10 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(24) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -379,26 +383,36 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -438,44 +452,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -485,102 +497,102 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done8: +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done16: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -595,10 +607,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -610,74 +623,77 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr @f, align 4 -// CHECK1-NEXT: store float [[TMP0]], ptr [[F]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr @f, align 4 +// CHECK1-NEXT: store float [[TMP1]], ptr [[F]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[F]], align 4 +// CHECK1-NEXT: store float [[TMP15]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -687,76 +703,79 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @f, align 4 -// CHECK1-NEXT: store float [[TMP2]], ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr @f, align 4 +// CHECK1-NEXT: store float [[TMP3]], ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -767,70 +786,72 @@ // CHECK1-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 1, ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 1, ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: store float 0.000000e+00, ptr [[F]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: // CHECK1-NEXT: store i8 2, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP12]], ptr @cnt, align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[F]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], ptr @cnt, align 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -838,37 +859,46 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 128 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 128 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..5, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 128 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -876,6 +906,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -899,7 +930,9 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -907,44 +940,44 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK1-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[BF_LOAD7:%.*]] = load i8, ptr [[B6]], align 8 // CHECK1-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK1-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK1-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK1-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK1-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B6]], align 8 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], ptr [[B6]], align 8 @@ -952,16 +985,16 @@ // CHECK1-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK1-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK1-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[C11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -973,11 +1006,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1001,110 +1034,112 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK1-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[A9]], ptr [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK1-NEXT: store ptr [[C12]], ptr [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP19]], -1 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP21]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[B11]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP20]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP22]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[B]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load [4 x i8], ptr [[TMP30]], align 1 -// CHECK1-NEXT: store [4 x i8] [[TMP31]], ptr [[TMP10]], align 1 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i8 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 +// CHECK1-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP33]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 // CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK1-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET]], ptr [[B16]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) // CHECK1-NEXT: ret void // // @@ -1176,14 +1211,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1192,133 +1224,135 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP1]], ptr align 128 [[VEC4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 128 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP25]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP30]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN16]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done17: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP33]]) // CHECK1-NEXT: ret void // // @@ -1338,7 +1372,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -1347,6 +1381,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1361,7 +1396,9 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1369,41 +1406,41 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -1415,11 +1452,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1433,71 +1470,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1509,7 +1548,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1556,6 +1595,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1580,7 +1620,9 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1588,43 +1630,43 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE0_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1636,11 +1678,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1665,110 +1707,112 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK3-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[A9]], ptr [[_TMP10]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK3-NEXT: store ptr [[C12]], ptr [[_TMP13]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B11]], ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B11]], ptr [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK3-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = load [4 x i8], ptr [[TMP31]], align 1 -// CHECK3-NEXT: store [4 x i8] [[TMP32]], ptr [[TMP10]], align 1 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP34:%.*]] = trunc i32 [[TMP33]] to i8 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load [4 x i8], ptr [[TMP33]], align 1 +// CHECK3-NEXT: store [4 x i8] [[TMP34]], ptr [[TMP12]], align 1 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP36:%.*]] = trunc i32 [[TMP35]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP34]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP36]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], ptr [[B16]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) // CHECK3-NEXT: ret void // // @@ -1776,6 +1820,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1795,13 +1840,21 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -1809,6 +1862,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1838,21 +1892,22 @@ // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 2 // CHECK3-NEXT: store i32 [[MUL]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], ptr [[TMP9]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1863,111 +1918,112 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[A5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP24]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1981,95 +2037,97 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[B3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B3]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP16]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP17]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: // CHECK3-NEXT: store i32 2, ptr [[B3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: store i32 [[TMP19]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B7]], align 8 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP21]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP24]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], ptr [[B7]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2080,86 +2138,88 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr @g1, align 8 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SIVAR3]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 128 -// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr @g, align 128 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP21]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 128 +// CHECK3-NEXT: store volatile i32 [[TMP21]], ptr @g, align 128 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP23]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2208,18 +2268,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2230,55 +2293,57 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr @g1, align 8 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP11]], align 4 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP14]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2290,43 +2355,43 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP13]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 32 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[BLOCK_CAPTURED6]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK4-NEXT: call void [[TMP17]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP15]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP16]], ptr [[BLOCK_CAPTURED4]], align 32 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK4-NEXT: call void [[TMP19]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK4-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK4-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP21]], ptr @g, align 128 -// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP23]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP23]], ptr @g, align 128 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // @@ -2352,6 +2417,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2376,7 +2442,9 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -2384,31 +2452,31 @@ // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2422,17 +2490,17 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 5 // CHECK4-NEXT: store ptr [[THIS1]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK4-NEXT: call void [[TMP11]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2444,11 +2512,11 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -2473,59 +2541,61 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK4-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK4-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK4-NEXT: store ptr [[A9]], ptr [[_TMP10]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK4-NEXT: store ptr [[C12]], ptr [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 @@ -2539,57 +2609,57 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK4-NEXT: store ptr [[TMP17]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK4-NEXT: store ptr [[TMP19]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED15:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], ptr [[BLOCK_CAPTURED15]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[BLOCK_CAPTURED15]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK4-NEXT: store ptr [[TMP19]], ptr [[BLOCK_CAPTURED16]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 -// CHECK4-NEXT: call void [[TMP21]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[BLOCK_CAPTURED16]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK4-NEXT: call void [[TMP23]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK4-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK4-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK4-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP8]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK4-NEXT: store i32 [[TMP27]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK4-NEXT: store i32 [[TMP29]], ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load [4 x i8], ptr [[TMP30]], align 1 -// CHECK4-NEXT: store [4 x i8] [[TMP31]], ptr [[TMP10]], align 1 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i8 +// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK4-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK4-NEXT: store i32 [[TMP29]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK4-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 +// CHECK4-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B18]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP33]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B18]], align 8 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) // CHECK4-NEXT: ret void // // @@ -2598,6 +2668,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2616,24 +2687,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2644,101 +2720,103 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[A5]], ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP24]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP25]], ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK4-NEXT: store i32 [[TMP28]], ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK4-NEXT: store i32 [[TMP30]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: store i32 [[TMP33]], ptr [[TMP12]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP14]]) // CHECK4-NEXT: ret void // // @@ -2747,6 +2825,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2776,19 +2855,22 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, ptr [[THIS]], ptr [[TMP5]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2802,83 +2884,85 @@ // CHECK4-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store ptr [[_TMP2]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP11]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 2 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B]], align 8 // CHECK4-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 4 // CHECK4-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK4-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK4-NEXT: [[TMP14:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK4-NEXT: [[TMP17:%.*]] = trunc i32 [[DEC]] to i8 // CHECK4-NEXT: [[BF_LOAD6:%.*]] = load i8, ptr [[B]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP14]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP17]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD6]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B]], align 8 // CHECK4-NEXT: [[BF_RESULT_SHL:%.*]] = shl i8 [[BF_VALUE]], 4 // CHECK4-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK4-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) // CHECK4-NEXT: ret void // // @@ -2892,6 +2976,10 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(24) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -2902,26 +2990,36 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..3) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done4: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP1]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -2961,44 +3059,42 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -3008,102 +3104,102 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done8: +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP31]], ptr [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK5-NEXT: ret void // // @@ -3118,10 +3214,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3133,74 +3230,77 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load float, ptr @f, align 4 -// CHECK5-NEXT: store float [[TMP0]], ptr [[F]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load float, ptr @f, align 4 +// CHECK5-NEXT: store float [[TMP1]], ptr [[F]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK5-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 -// CHECK5-NEXT: store float [[TMP14]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK5-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[F]], align 4 +// CHECK5-NEXT: store float [[TMP15]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3210,76 +3310,79 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: [[TMP2:%.*]] = load float, ptr @f, align 4 -// CHECK5-NEXT: store float [[TMP2]], ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: [[TMP3:%.*]] = load float, ptr @f, align 4 +// CHECK5-NEXT: store float [[TMP3]], ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK5-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK5-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3290,88 +3393,90 @@ // CHECK5-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 1, ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, ptr [[TMP2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 0 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 1, ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 0 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 +// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: store float 0.000000e+00, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP1]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp sle i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: br i1 [[TMP13]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: store float 0.000000e+00, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP11]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP14]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP1]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: store i8 2, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP18]], ptr @cnt, align 1 -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: store float [[TMP19]], ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP20]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP19]], ptr @cnt, align 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store float [[TMP20]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP21]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -3379,37 +3484,46 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK5-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 128 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 128 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 128 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..5, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 128 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -3417,6 +3531,7 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3440,7 +3555,9 @@ // CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[THIS1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -3448,44 +3565,44 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK5-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK5-NEXT: [[BF_LOAD7:%.*]] = load i8, ptr [[B6]], align 8 // CHECK5-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK5-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK5-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK5-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK5-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK5-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B6]], align 8 -// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK5-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK5-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK5-NEXT: store i8 [[BF_SET10]], ptr [[B6]], align 8 @@ -3493,16 +3610,16 @@ // CHECK5-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK5-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK5-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[C11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C11]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -3514,11 +3631,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -3542,110 +3659,112 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK5-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK5-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK5-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK5-NEXT: store ptr [[A9]], ptr [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 // CHECK5-NEXT: store ptr [[C12]], ptr [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP17]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP19]], -1 +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP21]], -1 // CHECK5-NEXT: store i32 [[DEC]], ptr [[B11]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP21]], 1 -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP20]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP23]], 1 +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP22]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK5-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], ptr [[B]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load [4 x i8], ptr [[TMP30]], align 1 -// CHECK5-NEXT: store [4 x i8] [[TMP31]], ptr [[TMP10]], align 1 -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i8 +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[B]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK5-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 +// CHECK5-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK5-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 -// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP33]], 15 +// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 // CHECK5-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK5-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK5-NEXT: store i8 [[BF_SET]], ptr [[B16]], align 8 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) // CHECK5-NEXT: ret void // // @@ -3717,14 +3836,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3733,133 +3849,135 @@ // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 128 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP1]], ptr align 128 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 128 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 128 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP25]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP30]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN16]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done17: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP33]]) // CHECK5-NEXT: ret void // // @@ -3879,7 +3997,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -3888,6 +4006,7 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3902,7 +4021,9 @@ // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[THIS1]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -3910,41 +4031,41 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -3956,11 +4077,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3974,71 +4095,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: store i32 [[TMP17]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP4]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK5-NEXT: ret void // // @@ -4050,7 +4173,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -192,18 +192,23 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -230,12 +235,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -245,112 +249,119 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP5]], i64 8, ptr inttoptr (i64 5 to ptr)) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP8]], i64 8, ptr inttoptr (i64 5 to ptr)) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL4]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 3 -// CHECK1-NEXT: store ptr [[ADD_PTR7]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD8]], ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 3 +// CHECK1-NEXT: store ptr [[ADD_PTR6]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTLVAR__VOID_ADDR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP5]], ptr [[DOTLVAR__VOID_ADDR]], ptr inttoptr (i64 5 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP8]], ptr [[DOTLVAR__VOID_ADDR]], ptr inttoptr (i64 5 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 -// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F1]], ptr [[LVAR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[PVAR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // @@ -370,6 +381,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -383,16 +395,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -417,124 +431,126 @@ // CHECK1-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK1-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP25]], -1 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP31]], ptr [[_TMP20]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP33]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK1-NEXT: store i32 [[TMP34]], ptr [[B]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP35]], ptr [[_TMP21]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP21]], align 8 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP40]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B22]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP39]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP41]], 15 // CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK1-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET]], ptr [[B22]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -570,12 +586,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -587,101 +602,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP10:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: store ptr [[LVAR5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL8]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTLINEAR_START3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], [[MUL9]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 -// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL6]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP22]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR4]], align 8 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP12]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP12]], align 8 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP30]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -701,7 +718,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -710,20 +727,23 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -739,81 +759,83 @@ // CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[MUL6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP9]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: ret void // // @@ -865,6 +887,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -878,16 +901,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -913,124 +938,126 @@ // CHECK3-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK3-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK3-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK3-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B9]], ptr [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B9]], ptr [[TMP28]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP32]], ptr [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP35]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP36]], ptr [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP37]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP40:%.*]] = trunc i32 [[TMP39]] to i8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP34]], ptr [[_TMP20]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP20]], align 8 +// CHECK3-NEXT: store i32 [[TMP35]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP38]], ptr [[_TMP21]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP21]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP42:%.*]] = trunc i32 [[TMP41]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B22]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP40]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP42]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], ptr [[B22]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -1038,6 +1065,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1057,25 +1085,30 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1090,136 +1123,139 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP11:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[_TMP20:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[_TMP17:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[_TMP18:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP7]], align 8 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP8]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK3-NEXT: store i32 [[ADD13]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK3-NEXT: store i32 [[ADD15]], ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP33]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP38]], ptr [[_TMP17]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP17]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP41]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP42]], ptr [[_TMP18]], align 8 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[_TMP18]], align 8 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1233,98 +1269,100 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: [[_TMP12:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load volatile i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK3-NEXT: store volatile i32 [[ADD10]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK3-NEXT: store volatile i32 [[ADD10]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr @g, align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[_TMP12]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP12]], align 8 -// CHECK3-NEXT: store volatile i32 [[TMP26]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr @g, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP26]], ptr [[_TMP12]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP12]], align 8 +// CHECK3-NEXT: store volatile i32 [[TMP27]], ptr [[TMP28]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1358,17 +1396,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1386,65 +1426,67 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START2]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK4-NEXT: store i32 [[ADD8]], ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK4-NEXT: store i32 [[ADD9]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load volatile i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK4-NEXT: store volatile i32 [[ADD10]], ptr [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load volatile i32, ptr [[TMP17]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK4-NEXT: store volatile i32 [[ADD10]], ptr [[TMP17]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store volatile i32 5, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store volatile i32 5, ptr [[TMP19]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1456,40 +1498,40 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP19:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP19]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP20]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED11:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP20]], ptr [[BLOCK_CAPTURED11]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK4-NEXT: call void [[TMP22]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[BLOCK_CAPTURED11]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK4-NEXT: call void [[TMP23]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK4-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK4-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], ptr @g, align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP27]], ptr [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK4-NEXT: store volatile i32 [[TMP28]], ptr [[TMP29]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP27]], ptr @g, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP28]], ptr [[_TMP13]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK4-NEXT: store volatile i32 [[TMP29]], ptr [[TMP30]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK4-NEXT: ret void // // @@ -1513,6 +1555,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1526,16 +1569,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -1561,73 +1606,75 @@ // CHECK4-NEXT: [[_TMP23:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK4-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK4-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 @@ -1640,58 +1687,58 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: store ptr [[TMP23]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: store ptr [[TMP25]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP24]], ptr [[BLOCK_CAPTURED19]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[BLOCK_CAPTURED19]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED20:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK4-NEXT: store ptr [[TMP25]], ptr [[BLOCK_CAPTURED20]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK4-NEXT: call void [[TMP27]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK4-NEXT: store ptr [[TMP27]], ptr [[BLOCK_CAPTURED20]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 +// CHECK4-NEXT: call void [[TMP29]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK4-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK4-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP31]], ptr [[_TMP22]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP22]], align 8 -// CHECK4-NEXT: store i32 [[TMP32]], ptr [[TMP33]], align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP34]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP35]], ptr [[_TMP23]], align 8 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP23]], align 8 -// CHECK4-NEXT: store i32 [[TMP36]], ptr [[TMP37]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i8 +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP33]], ptr [[_TMP22]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP22]], align 8 +// CHECK4-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP36]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP37]], ptr [[_TMP23]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP23]], align 8 +// CHECK4-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP40]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B24]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP39]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP41]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B24]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: ret void // // @@ -1700,6 +1747,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -1718,24 +1766,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1750,128 +1803,130 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP11:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[_TMP20:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[_TMP17:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[_TMP18:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK4-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 -// CHECK4-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP7]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP8]], align 8 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK4-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK4-NEXT: store i32 [[ADD11]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK4-NEXT: store i32 [[ADD13]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK4-NEXT: store i32 [[ADD15]], ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP30]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP33]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK4-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK4-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK4-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK4-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK4-NEXT: [[C22:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[C22]], align 8 -// CHECK4-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP38]], ptr [[_TMP17]], align 8 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP17]], align 8 +// CHECK4-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP41]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP42]], ptr [[_TMP18]], align 8 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[C19:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP44:%.*]] = load ptr, ptr [[C19]], align 8 +// CHECK4-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/for_private_codegen.cpp b/clang/test/OpenMP/for_private_codegen.cpp --- a/clang/test/OpenMP/for_private_codegen.cpp +++ b/clang/test/OpenMP/for_private_codegen.cpp @@ -116,7 +116,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -126,8 +128,8 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -138,8 +140,8 @@ // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP1]] @@ -169,10 +171,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -189,6 +192,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -206,73 +211,73 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -287,10 +292,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -301,50 +307,52 @@ // CHECK1-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -352,27 +360,28 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -440,10 +449,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -453,96 +463,98 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -562,7 +574,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -575,7 +587,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -601,10 +613,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -621,67 +634,69 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -700,17 +715,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -727,42 +744,44 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK4-NEXT: store i32 2, ptr [[SVAR]], align 4 // CHECK4-NEXT: store float 3.000000e+00, ptr [[SFVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 0 @@ -776,33 +795,33 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP10]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP11]], ptr [[BLOCK_CAPTURED4]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[BLOCK_CAPTURED5]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load float, ptr [[SFVAR]], align 4 -// CHECK4-NEXT: store float [[TMP12]], ptr [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void [[TMP14]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP13:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK4-NEXT: store float [[TMP13]], ptr [[BLOCK_CAPTURED6]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -542,8 +542,21 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_17:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_18:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_19:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_20:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_21:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -581,70 +594,124 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 6, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[TMP0]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 10, [[TMP2]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP4]], align 16 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 10, i64 [[TMP2]], ptr [[VLA]], ptr [[VEC]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 10, i64 [[TMP2]], ptr [[VLA]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, i64 10, i64 [[TMP2]], ptr [[VLA]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[VVAR2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[TMP7]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[TMP8]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP9]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 10, [[TMP8]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_16]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_17]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_17]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_18]]) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_19]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_20]]) +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_21]]) +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL22]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP36]]) +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi ptr [ [[TMP11]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] -// CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN19]], i64 4 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] -// CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP12]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN25]], i64 40 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi ptr [ [[TMP38]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN31:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN31]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY32:%.*]] +// CHECK1: arraydestroy.body32: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST33:%.*]] = phi ptr [ [[TMP39]], [[ARRAYDESTROY_DONE30]] ], [ [[ARRAYDESTROY_ELEMENT34:%.*]], [[ARRAYDESTROY_BODY32]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT34]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST33]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT34]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE35:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT34]], [[ARRAY_BEGIN31]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE35]], label [[ARRAYDESTROY_DONE36:%.*]], label [[ARRAYDESTROY_BODY32]] +// CHECK1: arraydestroy.done36: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP13]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP40]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -671,16 +738,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[S_ARR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -689,198 +751,200 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP31:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP25]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR16]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR17]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = fadd float [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store float [[ADD12]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store float [[ADD8]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP28:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV16:%.*]] = uitofp i1 [[TMP28]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV16]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV12:%.*]] = uitofp i1 [[TMP35]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV12]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = fcmp olt float [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK1: cond.true18: -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END20:%.*]] -// CHECK1: cond.false19: -// CHECK1-NEXT: [[TMP32:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END20]] -// CHECK1: cond.end20: -// CHECK1-NEXT: [[COND21:%.*]] = phi float [ [[TMP31]], [[COND_TRUE18]] ], [ [[TMP32]], [[COND_FALSE19]] ] -// CHECK1-NEXT: store float [[COND21]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP13:%.*]] = fcmp olt float [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK1: cond.true14: +// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END16:%.*]] +// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END16]] +// CHECK1: cond.end16: +// CHECK1-NEXT: [[COND17:%.*]] = phi float [ [[TMP38]], [[COND_TRUE14]] ], [ [[TMP39]], [[COND_FALSE15]] ] +// CHECK1-NEXT: store float [[COND17]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP33]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL22]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]] -// CHECK1: land.rhs26: -// CHECK1-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END29]] -// CHECK1: land.end29: -// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ] -// CHECK1-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP35]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP23]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP3]] monotonic, align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw fadd ptr [[TMP2]], float [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL18:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL18]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL21]], label [[LAND_RHS22:%.*]], label [[LAND_END25:%.*]] +// CHECK1: land.rhs22: +// CHECK1-NEXT: [[CALL23:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL24:%.*]] = fcmp une float [[CALL23]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END25]] +// CHECK1: land.end25: +// CHECK1-NEXT: [[TMP42:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL24]], [[LAND_RHS22]] ] +// CHECK1-NEXT: [[CONV26:%.*]] = uitofp i1 [[TMP42]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], float noundef [[CONV26]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP19]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP8]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP37:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP45:%.*]], [[COND_END35:%.*]] ] -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i32 [[TMP37]] to float -// CHECK1-NEXT: store float [[TMP38]], ptr [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP39]], [[TMP40]] -// CHECK1-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]] -// CHECK1: cond.true33: -// CHECK1-NEXT: [[TMP41:%.*]] = load float, ptr [[_TMP31]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.false34: -// CHECK1-NEXT: [[TMP42:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.end35: -// CHECK1-NEXT: [[COND36:%.*]] = phi float [ [[TMP41]], [[COND_TRUE33]] ], [ [[TMP42]], [[COND_FALSE34]] ] -// CHECK1-NEXT: store float [[COND36]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = cmpxchg ptr [[TMP3]], i32 [[TMP37]], i32 [[TMP43]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP45]] = extractvalue { i32, i1 } [[TMP44]], 0 -// CHECK1-NEXT: [[TMP46:%.*]] = extractvalue { i32, i1 } [[TMP44]], 1 -// CHECK1-NEXT: br i1 [[TMP46]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP44:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END25]] ], [ [[TMP52:%.*]], [[COND_END31:%.*]] ] +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP44]] to float +// CHECK1-NEXT: store float [[TMP45]], ptr [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load float, ptr [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = fcmp olt float [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP28]], label [[COND_TRUE29:%.*]], label [[COND_FALSE30:%.*]] +// CHECK1: cond.true29: +// CHECK1-NEXT: [[TMP48:%.*]] = load float, ptr [[_TMP27]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.false30: +// CHECK1-NEXT: [[TMP49:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.end31: +// CHECK1-NEXT: [[COND32:%.*]] = phi float [ [[TMP48]], [[COND_TRUE29]] ], [ [[TMP49]], [[COND_FALSE30]] ] +// CHECK1-NEXT: store float [[COND32]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = cmpxchg ptr [[TMP8]], i32 [[TMP44]], i32 [[TMP50]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP52]] = extractvalue { i32, i1 } [[TMP51]], 0 +// CHECK1-NEXT: [[TMP53:%.*]] = extractvalue { i32, i1 } [[TMP51]], 1 +// CHECK1-NEXT: br i1 [[TMP53]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP9]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -976,15 +1040,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -998,232 +1058,234 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX8]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN11:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]] -// CHECK1-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX12]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[VLA15:%.*]] = alloca [[STRUCT_S]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], ptr [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY16:%.*]] = icmp eq ptr [[VLA15]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY16]], label [[OMP_ARRAYINIT_DONE21:%.*]], label [[OMP_ARRAYINIT_BODY17:%.*]] -// CHECK1: omp.arrayinit.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY17]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYINIT_DONE21]], label [[OMP_ARRAYINIT_BODY17]] -// CHECK1: omp.arrayinit.done21: -// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = sub i64 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: [[TMP34:%.*]] = sdiv exact i64 [[TMP33]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP37]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP38]], 9 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX5]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP10]], i64 0, i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX9]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY10]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[VLA12:%.*]] = alloca [[STRUCT_S]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY13:%.*]] = icmp eq ptr [[VLA12]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY13]], label [[OMP_ARRAYINIT_DONE18:%.*]], label [[OMP_ARRAYINIT_BODY14:%.*]] +// CHECK1: omp.arrayinit.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYINIT_BODY14]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYINIT_DONE18]], label [[OMP_ARRAYINIT_BODY14]] +// CHECK1: omp.arrayinit.done18: +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP43]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP44]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP39]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP45]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP49]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP44]] -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP45]] to i64 -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP46]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX24]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[TMP50]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP51]] to i64 +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX20]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX21]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP52]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX21]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP47]], 1 -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP53]], 1 +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA7]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VLA15]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP28]] to ptr -// CHECK1-NEXT: store ptr [[TMP55]], ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP57]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP58]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP55]]) +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VLA12]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP34]] to ptr +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP63]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: store i32 [[ADD27]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP66]], [[TMP67]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done30: -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq ptr [[ARRAYIDX9]], [[TMP62]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE38:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] -// CHECK1: omp.arraycpy.body32: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi ptr [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT35:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT35]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT36]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE37:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT35]], [[TMP62]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE37]], label [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_BODY32]] -// CHECK1: omp.arraycpy.done38: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP57]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY28:%.*]] = icmp eq ptr [[ARRAYIDX6]], [[TMP68]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY28]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY29:%.*]] +// CHECK1: omp.arraycpy.body29: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi ptr [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP68]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY29]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP63]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY39:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP63]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY39]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY40:%.*]] -// CHECK1: omp.arraycpy.body40: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST41:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST42:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST41]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 [[TMP64]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST41]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP63]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY40]] -// CHECK1: omp.arraycpy.done46: -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY47:%.*]] = icmp eq ptr [[ARRAYIDX9]], [[TMP66]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY47]], label [[OMP_ARRAYCPY_DONE55:%.*]], label [[OMP_ARRAYCPY_BODY48:%.*]] -// CHECK1: omp.arraycpy.body48: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST49:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT53:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST50:%.*]] = phi ptr [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT52:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP68]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL51:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST50]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST49]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST50]], ptr align 4 [[CALL51]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP68]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT52]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST50]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT53]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST49]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE54:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT52]], [[TMP66]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE54]], label [[OMP_ARRAYCPY_DONE55]], label [[OMP_ARRAYCPY_BODY48]] -// CHECK1: omp.arraycpy.done55: +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY36:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP69]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY36]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY37:%.*]] +// CHECK1: omp.arraycpy.body37: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST38:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST39:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST38]], align 4 +// CHECK1-NEXT: [[TMP71:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 [[TMP70]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST38]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP69]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY37]] +// CHECK1: omp.arraycpy.done43: +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY44:%.*]] = icmp eq ptr [[ARRAYIDX6]], [[TMP72]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY44]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY45:%.*]] +// CHECK1: omp.arraycpy.body45: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST46:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST47:%.*]] = phi ptr [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP74]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL48:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST47]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST46]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST47]], ptr align 4 [[CALL48]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP74]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST47]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST46]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP72]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY45]] +// CHECK1: omp.arraycpy.done52: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA15]], [[TMP69]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE56:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA12]], [[TMP75]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE53:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP69]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP75]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE56]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done56: -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP70]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE53]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done53: +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP76]]) // CHECK1-NEXT: ret void // // @@ -1282,14 +1344,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1298,196 +1357,198 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS4:%.*]] = alloca [10 x [4 x %struct.S]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY5:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY5]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY6:%.*]] -// CHECK1: omp.arrayinit.body6: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY6]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY6]] -// CHECK1: omp.arrayinit.done10: -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY1:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY1]], label [[OMP_ARRAYINIT_DONE6:%.*]], label [[OMP_ARRAYINIT_BODY2:%.*]] +// CHECK1: omp.arrayinit.body2: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY2]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST3]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYINIT_DONE6]], label [[OMP_ARRAYINIT_BODY2]] +// CHECK1: omp.arrayinit.done6: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA3]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA3]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[ARRS4]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP29]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP34]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP6]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP3]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARRS4]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST22]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST21]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST22]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP8]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY27:%.*]] = icmp eq ptr [[TMP2]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY27]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY28:%.*]] -// CHECK1: omp.arraycpy.body28: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST29:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST30:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST29]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 [[TMP36]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST29]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY28]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq ptr [[TMP3]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi ptr [ [[ARRS4]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL39:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST38]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST37]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST38]], ptr align 4 [[CALL39]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done43: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[TMP6]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] +// CHECK1: omp.arraycpy.body24: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT27:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 [[TMP41]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT27]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT28]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT27]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY24]] +// CHECK1: omp.arraycpy.done30: +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq ptr [[TMP8]], [[TMP43]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE39:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] +// CHECK1: omp.arraycpy.body32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY32]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL35:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr align 4 [[CALL35]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT36]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT37]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE38:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT36]], [[TMP43]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_BODY32]] +// CHECK1: omp.arraycpy.done39: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN44:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN44]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN40:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN40]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP46]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN44]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE45:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done45: -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP42]]) -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP44]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN40]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE41:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done41: +// CHECK1-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP49]]) // CHECK1-NEXT: ret void // // @@ -1543,152 +1604,152 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR6:%.*]] = alloca [1 x [2 x i32]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [1 x [2 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP4]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX4]], i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR6]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP7]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX2]], i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[ARR6]], i64 [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARR]], i64 [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX8]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR6]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP27]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP31]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP27]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[ARR6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 [[TMP33]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP27]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP37]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP40]]) // CHECK1-NEXT: ret void // // @@ -1724,11 +1785,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1743,107 +1804,109 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP37]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP37]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -1852,47 +1915,47 @@ // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP37]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] // CHECK1: omp.arraycpy.body13: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP38]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] // CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP43]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP42]]) -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP44]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP46]]) // CHECK1-NEXT: ret void // // @@ -1929,160 +1992,162 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP11]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP13]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP28]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP28]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr align 4 [[CALL17]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP36]]) // CHECK1-NEXT: ret void // // @@ -2116,160 +2181,162 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP11]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP13]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP28]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP28]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr align 4 [[CALL17]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP29]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP36]]) // CHECK1-NEXT: ret void // // @@ -2303,111 +2370,113 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP10]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP12]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP13]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP15]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL9]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL8]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP13]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP15]]) // CHECK1-NEXT: ret void // // @@ -2430,148 +2499,150 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[VVAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], ptr [[VVAR22]], i64 [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], ptr [[VVAR2]], i64 [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VVAR22]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP20]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP22]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST5]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST5]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST4:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST4]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST4]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE7:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done8: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done7: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY9:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY9]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY10:%.*]] -// CHECK1: omp.arraycpy.body10: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP25]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST12]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST11]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST12]], ptr align 4 [[CALL13]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP25]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY10]] -// CHECK1: omp.arraycpy.done17: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] +// CHECK1: omp.arraycpy.body9: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST11]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST10]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST11]], ptr align 4 [[CALL12]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT14]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY9]] +// CHECK1: omp.arraycpy.done16: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN18:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN18]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN18]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE19:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done19: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done18: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -2605,11 +2676,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2618,145 +2689,147 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR34]], i64 [[TMP9]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP32]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2790,11 +2863,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2803,145 +2876,147 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR34]], i64 [[TMP9]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP32]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2975,11 +3050,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2995,104 +3070,106 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 -// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP8]], align 16 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = add nuw i64 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP16]] -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP33]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP35]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -3101,47 +3178,47 @@ // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP35]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP37]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] // CHECK1: omp.arraycpy.body12: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP38]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] // CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP39]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP41]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done20: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP40]]) -// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP42]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP42]]) +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP44]]) // CHECK1-NEXT: ret void // // @@ -3178,11 +3255,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3191,136 +3268,138 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR33:%.*]] = alloca [4 x %struct.S], align 16 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [4 x %struct.S], align 16 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: store ptr [[VAR33]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR33]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP17]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.24, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP19]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.24, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP19]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP21]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR33]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP19]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE8:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP21]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done9: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP2]], [[TMP20]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// CHECK1: omp.arraycpy.body12: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR33]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP20]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] -// CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY10:%.*]] = icmp eq ptr [[TMP4]], [[TMP22]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY10]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY11:%.*]] +// CHECK1: omp.arraycpy.body11: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST13]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST12]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST13]], ptr align 4 [[CALL14]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP22]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY11]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN19]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP25]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN19]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done20: +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP27]]) // CHECK1-NEXT: ret void // // @@ -3358,64 +3437,97 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.12], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.0], align 16 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.12], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiLi42EET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN]], i64 42 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..25, ptr [[T_VAR]], ptr [[TMP0]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..29, ptr [[ARR]], ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi ptr [ [[TMP4]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi ptr [ [[TMP19]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -3482,16 +3594,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3500,169 +3607,171 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP22:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[REF_TMP18:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP25]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR16]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR17]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP9]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL13]], 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP28:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP28]] to i32 +// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP35]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK1: cond.true17: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END19:%.*]] -// CHECK1: cond.false18: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END19]] -// CHECK1: cond.end19: -// CHECK1-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE17]] ], [ [[TMP32]], [[COND_FALSE18]] ] -// CHECK1-NEXT: store i32 [[COND20]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE13:%.*]], label [[COND_FALSE14:%.*]] +// CHECK1: cond.true13: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END15:%.*]] +// CHECK1: cond.false14: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END15]] +// CHECK1: cond.end15: +// CHECK1-NEXT: [[COND16:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE13]] ], [ [[TMP39]], [[COND_FALSE14]] ] +// CHECK1-NEXT: store i32 [[COND16]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP33]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL21]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL23:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[CALL23]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL24]], label [[LAND_RHS25:%.*]], label [[LAND_END28:%.*]] -// CHECK1: land.rhs25: -// CHECK1-NEXT: [[CALL26:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[CALL26]], 0 -// CHECK1-NEXT: br label [[LAND_END28]] -// CHECK1: land.end28: -// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL27]], [[LAND_RHS25]] ] -// CHECK1-NEXT: [[CONV29:%.*]] = zext i1 [[TMP35]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]], i32 noundef [[CONV29]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP22]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw min ptr [[TMP3]], i32 [[TMP36]] monotonic, align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL17]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL19:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL20:%.*]] = icmp ne i32 [[CALL19]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL20]], label [[LAND_RHS21:%.*]], label [[LAND_END24:%.*]] +// CHECK1: land.rhs21: +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[CALL22]], 0 +// CHECK1-NEXT: br label [[LAND_END24]] +// CHECK1: land.end24: +// CHECK1-NEXT: [[TMP42:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL23]], [[LAND_RHS21]] ] +// CHECK1-NEXT: [[CONV25:%.*]] = zext i1 [[TMP42]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]], i32 noundef [[CONV25]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP18]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw min ptr [[TMP8]], i32 [[TMP43]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -3671,7 +3780,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 @@ -3758,14 +3867,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3774,128 +3880,130 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 1, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.28, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.28, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL7:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL8]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP22]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP27]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP24:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LAND_END14:%.*]] ] -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[_TMP10]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[_TMP10]], align 4 -// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL11]], label [[LAND_RHS12:%.*]], label [[LAND_END14]] -// CHECK1: land.rhs12: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br label [[LAND_END14]] -// CHECK1: land.end14: -// CHECK1-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL13]], [[LAND_RHS12]] ] -// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP27]] to i32 -// CHECK1-NEXT: store i32 [[CONV15]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP24]], i32 [[TMP28]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP30]] = extractvalue { i32, i1 } [[TMP29]], 0 -// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { i32, i1 } [[TMP29]], 1 -// CHECK1-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP29:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP35:%.*]], [[LAND_END13:%.*]] ] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[_TMP9]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[_TMP9]], align 4 +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END13]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL12:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL12]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = zext i1 [[TMP32]] to i32 +// CHECK1-NEXT: store i32 [[CONV14]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP29]], i32 [[TMP33]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP35]] = extractvalue { i32, i1 } [[TMP34]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = extractvalue { i32, i1 } [[TMP34]], 1 +// CHECK1-NEXT: br i1 [[TMP36]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP11]]) // CHECK1-NEXT: ret void // // @@ -3927,15 +4035,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(168) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3944,168 +4048,170 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S.0], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S.12], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP20:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARR4]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr ([[STRUCT_S_12]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARR]], i64 [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP22]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX5]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP8]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR4]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST10]]) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD12]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]]) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CALL]], [[CALL10]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD11]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST19]]) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[CALL21]], [[CALL22]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]], i32 noundef [[ADD23]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST19]], ptr align 4 [[REF_TMP20]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done27: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) +// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CALL20]], [[CALL21]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], i32 noundef [[ADD22]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr align 4 [[REF_TMP19]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done26: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN28]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [40 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN27]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done29: -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP37]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done28: +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -4114,7 +4220,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 @@ -4123,7 +4229,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP7]], i64 40 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[TMP7]], i64 40 // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: @@ -4135,8 +4241,8 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: @@ -4149,7 +4255,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile double, ptr @g, align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[F]], align 4 @@ -4164,7 +4270,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double // CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 @@ -4194,10 +4300,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4213,93 +4320,95 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G1]], align 8 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr @g, align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr @g, align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP19]], [[TMP20]] // CHECK3-NEXT: store double [[ADD5]], ptr @g, align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[G1]], align 8 -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store double [[ADD6]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G1]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store double [[ADD6]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw fadd ptr @g, double [[TMP22]] monotonic, align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G1]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw fadd ptr [[TMP1]], double [[TMP24]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr @g, double [[TMP23]] monotonic, align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[G1]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr [[TMP2]], double [[TMP25]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -4346,17 +4455,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4372,46 +4483,48 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G1]], align 8 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -4423,55 +4536,55 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP11:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP11]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED4]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void [[TMP14]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP17]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP19:%.*]] = load double, ptr @g, align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP19]], [[TMP20]] +// CHECK4-NEXT: [[TMP20:%.*]] = load double, ptr @g, align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP20]], [[TMP21]] // CHECK4-NEXT: store double [[ADD6]], ptr @g, align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK4-NEXT: [[TMP22:%.*]] = load double, ptr [[G1]], align 8 -// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store double [[ADD7]], ptr [[TMP1]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load double, ptr [[G1]], align 8 +// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store double [[ADD7]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr @g, double [[TMP23]] monotonic, align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load double, ptr [[G1]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr [[TMP1]], double [[TMP25]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP24:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = atomicrmw fadd ptr @g, double [[TMP24]] monotonic, align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load double, ptr [[G1]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr [[TMP2]], double [[TMP26]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -479,8 +479,11 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[S]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[S]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // @@ -496,45 +499,47 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[S:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S1:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @.omp_initializer.(ptr noundef [[S1]], ptr noundef [[TMP0]]) +// CHECK1-NEXT: call void @.omp_initializer.(ptr noundef [[S]], ptr noundef [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK1-NEXT: call void @.omp_combiner.(ptr noundef [[TMP0]], ptr noundef [[S1]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @.omp_combiner.(ptr noundef [[TMP2]], ptr noundef [[S]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -616,8 +621,15 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -655,62 +667,100 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined..1, ptr [[T_VAR]], ptr [[TMP0]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 10, [[TMP2]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP4]], align 16 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..9, i64 10, i64 [[TMP2]], ptr [[VLA]], ptr [[VEC]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i64 10, i64 [[TMP2]], ptr [[VLA]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..15, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[VVAR2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..19, ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..21, ptr [[TMP6]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 10, [[TMP8]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL16]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP26]]) +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP8]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi ptr [ [[TMP9]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN19]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN19]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] // CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP28]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi ptr [ [[TMP29]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP11]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP30]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -737,16 +787,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -755,138 +800,140 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..3(ptr noundef [[T_VAR3]], ptr noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..7(ptr noundef [[VAR17]], ptr noundef [[TMP2]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @.init, align 4 -// CHECK1-NEXT: store float [[TMP8]], ptr [[T_VAR18]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: call void @.omp_initializer..3(ptr noundef [[T_VAR]], ptr noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..7(ptr noundef [[VAR1]], ptr noundef [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr @.init, align 4 +// CHECK1-NEXT: store float [[TMP15]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP19]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP26]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR17]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR18]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP17]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) -// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) +// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR15]], ptr noundef [[ADD_PTR16]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -1037,15 +1084,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1059,240 +1102,242 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX9]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN12:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]] -// CHECK1-NEXT: [[ARRAYDECAY14:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX13]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX15]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[VLA16:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], ptr [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY17:%.*]] = icmp eq ptr [[VLA16]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY17]], label [[OMP_ARRAYINIT_DONE25:%.*]], label [[OMP_ARRAYINIT_BODY18:%.*]] -// CHECK1: omp.arrayinit.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR21]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYINIT_DONE25]], label [[OMP_ARRAYINIT_BODY18]] -// CHECK1: omp.arrayinit.done25: -// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = sub i64 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: [[TMP34:%.*]] = sdiv exact i64 [[TMP33]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP37]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP38]], 9 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX6]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP10]], i64 0, i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYDECAY11:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX10]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY11]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[VLA13:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY14:%.*]] = icmp eq ptr [[VLA13]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY14]], label [[OMP_ARRAYINIT_DONE22:%.*]], label [[OMP_ARRAYINIT_BODY15:%.*]] +// CHECK1: omp.arrayinit.body15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i64 4 +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR18]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYINIT_DONE22]], label [[OMP_ARRAYINIT_BODY15]] +// CHECK1: omp.arrayinit.done22: +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP43]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP44]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP39]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP45]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP40]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP26:%.*]] = icmp sle i32 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP49]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP44]] -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP45]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX27]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARRAYIDX28]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP46]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX28]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[TMP50]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP51]] to i64 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX24]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX25]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP52]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX25]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP47]], 1 -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP53]], 1 +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA7]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VLA16]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP28]] to ptr -// CHECK1-NEXT: store ptr [[TMP55]], ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP57]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP58]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP55]]) +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VLA13]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP34]] to ptr +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP63]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP59]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq ptr [[ARRAYIDX10]], [[TMP60]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE44:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT42:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[ADD_PTR39:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i64 4 -// CHECK1-NEXT: [[ADD_PTR40:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR39]], ptr noundef [[ADD_PTR40]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT41]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT42]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE43:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT41]], [[TMP60]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_DONE44]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done44: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP57]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST27:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST28:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST28]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST27]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST28]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST27]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY32:%.*]] = icmp eq ptr [[ARRAYIDX7]], [[TMP66]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY32]], label [[OMP_ARRAYCPY_DONE41:%.*]], label [[OMP_ARRAYCPY_BODY33:%.*]] +// CHECK1: omp.arraycpy.body33: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST34:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT39:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST35:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[ADD_PTR36:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST35]], i64 4 +// CHECK1-NEXT: [[ADD_PTR37:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST34]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR36]], ptr noundef [[ADD_PTR37]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT38]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST35]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT39]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE40:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT38]], [[TMP66]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_DONE41]], label [[OMP_ARRAYCPY_BODY33]] +// CHECK1: omp.arraycpy.done41: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP63]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY45:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP61]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY45]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY46:%.*]] -// CHECK1: omp.arraycpy.body46: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST47:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST48:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP63]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST48]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST47]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP63]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST48]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST47]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP61]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY46]] -// CHECK1: omp.arraycpy.done52: -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY53:%.*]] = icmp eq ptr [[ARRAYIDX10]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY53]], label [[OMP_ARRAYCPY_DONE62:%.*]], label [[OMP_ARRAYCPY_BODY54:%.*]] -// CHECK1: omp.arraycpy.body54: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST55:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT60:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST56:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT59:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP66]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR57:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST56]], i64 4 -// CHECK1-NEXT: [[ADD_PTR58:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST55]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR57]], ptr noundef [[ADD_PTR58]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP66]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT59]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST56]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT60]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST55]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE61:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT59]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE61]], label [[OMP_ARRAYCPY_DONE62]], label [[OMP_ARRAYCPY_BODY54]] -// CHECK1: omp.arraycpy.done62: +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY42:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP67]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY42]], label [[OMP_ARRAYCPY_DONE49:%.*]], label [[OMP_ARRAYCPY_BODY43:%.*]] +// CHECK1: omp.arraycpy.body43: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST44:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST45:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT46:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP69]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST45]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST44]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP69]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT46]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT47]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST44]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE48:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT46]], [[TMP67]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE48]], label [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_BODY43]] +// CHECK1: omp.arraycpy.done49: +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY50:%.*]] = icmp eq ptr [[ARRAYIDX7]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY50]], label [[OMP_ARRAYCPY_DONE59:%.*]], label [[OMP_ARRAYCPY_BODY51:%.*]] +// CHECK1: omp.arraycpy.body51: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST52:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT57:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST53:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT56:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP72]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST53]], i64 4 +// CHECK1-NEXT: [[ADD_PTR55:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST52]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR54]], ptr noundef [[ADD_PTR55]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP72]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT56]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST53]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT57]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST52]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE58:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT56]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE58]], label [[OMP_ARRAYCPY_DONE59]], label [[OMP_ARRAYCPY_BODY51]] +// CHECK1: omp.arraycpy.done59: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA16]], [[TMP67]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE63:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA13]], [[TMP73]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE60:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP67]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP73]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE63]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done63: -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP68]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA13]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE60]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done60: +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP74]]) // CHECK1-NEXT: ret void // // @@ -1377,14 +1422,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1393,204 +1435,206 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS5:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT1:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT1]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT1]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY6:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY6]], label [[OMP_ARRAYINIT_DONE14:%.*]], label [[OMP_ARRAYINIT_BODY7:%.*]] -// CHECK1: omp.arrayinit.body7: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i64 4 -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR10]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYINIT_DONE14]], label [[OMP_ARRAYINIT_BODY7]] -// CHECK1: omp.arrayinit.done14: -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY2:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY2]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY3:%.*]] +// CHECK1: omp.arrayinit.body3: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST4:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i64 4 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST4]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR6]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST4]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY3]] +// CHECK1: omp.arrayinit.done10: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP15:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA3]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX16]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA3]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[ARRS5]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP29]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP34]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP6]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST19]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[TMP3]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[ARRS5]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[ADD_PTR27:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i64 4 -// CHECK1-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR27]], ptr noundef [[ADD_PTR28]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP8]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// CHECK1: omp.arraycpy.body20: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i64 4 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR23]], ptr noundef [[ADD_PTR24]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] +// CHECK1: omp.arraycpy.done28: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY33:%.*]] = icmp eq ptr [[TMP2]], [[TMP33]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY33]], label [[OMP_ARRAYCPY_DONE40:%.*]], label [[OMP_ARRAYCPY_BODY34:%.*]] -// CHECK1: omp.arraycpy.body34: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST35:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST36:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP35]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST36]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST35]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP35]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT37]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT38]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST35]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE39:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT37]], [[TMP33]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_BODY34]] -// CHECK1: omp.arraycpy.done40: -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY41:%.*]] = icmp eq ptr [[TMP3]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY41]], label [[OMP_ARRAYCPY_DONE50:%.*]], label [[OMP_ARRAYCPY_BODY42:%.*]] -// CHECK1: omp.arraycpy.body42: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST43:%.*]] = phi ptr [ [[ARRS5]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST44:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i64 4 -// CHECK1-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR45]], ptr noundef [[ADD_PTR46]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT47]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT48]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE49:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT47]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_BODY42]] -// CHECK1: omp.arraycpy.done50: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY29:%.*]] = icmp eq ptr [[TMP6]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY29]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY30:%.*]] +// CHECK1: omp.arraycpy.body30: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST31:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST32:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST32]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST31]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST32]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY30]] +// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY37:%.*]] = icmp eq ptr [[TMP8]], [[TMP41]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY37]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY38:%.*]] +// CHECK1: omp.arraycpy.body38: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST39:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST40:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR41:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST40]], i64 4 +// CHECK1-NEXT: [[ADD_PTR42:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST39]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR41]], ptr noundef [[ADD_PTR42]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST40]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP41]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY38]] +// CHECK1: omp.arraycpy.done46: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN51:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN51]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN47:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN47]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done52: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP40]]) -// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP42]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN47]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE48:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done48: +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP45]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP47]]) // CHECK1-NEXT: ret void // // @@ -1644,11 +1688,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1663,32 +1707,34 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] @@ -1698,76 +1744,76 @@ // CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP7]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP7]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP30]]) -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP37]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP37]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -1777,48 +1823,48 @@ // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR12]], ptr noundef [[ADD_PTR13]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP37]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] // CHECK1: omp.arraycpy.body18: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 // CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR21]], ptr noundef [[ADD_PTR22]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP38]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] // CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP43]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done27: -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP42]]) -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP44]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP46]]) // CHECK1-NEXT: ret void // // @@ -1856,154 +1902,156 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(60) [[VVAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S.0], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VVAR22]], i64 [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VVAR2]], i64 [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VVAR22]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP20]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP22]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i64 4 -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP22]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST6:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i64 4 +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST6]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR8]], ptr noundef [[ADD_PTR9]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST6]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK1: omp.arraycpy.body15: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP25]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i64 4 -// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR18]], ptr noundef [[ADD_PTR19]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP25]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR17:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i64 4 +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR17]], ptr noundef [[ADD_PTR18]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN24:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN24]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN24]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE25:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done25: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -2038,11 +2086,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2051,151 +2099,153 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S.0], align 16 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S.0], align 16 +// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VAR34]], i64 [[TMP9]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i64 4 -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR12]], ptr noundef [[ADD_PTR13]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP26]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done15: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] -// CHECK1: omp.arraycpy.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 -// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR21]], ptr noundef [[ADD_PTR22]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] +// CHECK1: omp.arraycpy.body17: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i64 4 +// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR20]], ptr noundef [[ADD_PTR21]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY17]] +// CHECK1: omp.arraycpy.done25: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN27]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN26]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done28: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP32]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN26]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done27: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2230,11 +2280,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2248,130 +2298,132 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 48, ptr inttoptr (i64 6 to ptr)) +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP6]], i64 48, ptr inttoptr (i64 6 to ptr)) // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[DOTVAR3__VOID_ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[DOTVAR3__VOID_ADDR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i64 4 // CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR10]], ptr noundef [[ADD_PTR11]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP2]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP4]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] // CHECK1: omp.arraycpy.body16: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[DOTVAR3__VOID_ADDR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i64 4 // CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR19]], ptr noundef [[ADD_PTR20]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] // CHECK1: omp.arraycpy.done24: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[DOTVAR3__VOID_ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done26: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTVAR3__VOID_ADDR]], ptr inttoptr (i64 6 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP6]], ptr [[DOTVAR3__VOID_ADDR]], ptr inttoptr (i64 6 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -2418,6 +2470,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiLi42EET_v.vec, i64 8, i1 false) @@ -2437,37 +2492,67 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined..23, ptr [[T_VAR]], ptr [[TMP0]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..31, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..35, ptr [[ARR]], ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi ptr [ [[TMP4]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi ptr [ [[TMP19]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -2526,16 +2611,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2544,134 +2624,136 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..25(ptr noundef [[T_VAR3]], ptr noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..27(ptr noundef [[VAR17]], ptr noundef [[TMP2]]) -// CHECK1-NEXT: call void @.omp_initializer..29(ptr noundef [[T_VAR18]], ptr noundef [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: call void @.omp_initializer..25(ptr noundef [[T_VAR]], ptr noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..27(ptr noundef [[VAR1]], ptr noundef [[TMP6]]) +// CHECK1-NEXT: call void @.omp_initializer..29(ptr noundef [[T_VAR1]], ptr noundef [[TMP8]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP18]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP25]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR17]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR18]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP9]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) -// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) +// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR15]], ptr noundef [[ADD_PTR16]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -2800,14 +2882,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2816,96 +2895,98 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @.init.32, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr @.init.32, align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX5]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.33, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.33, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -2942,15 +3023,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(504) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2959,164 +3036,166 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..37(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..37(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARR4]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARR]], i64 [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP22]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP8]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX9]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP28]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR4]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP29]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i64 4 -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR14]], ptr noundef [[ADD_PTR15]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i64 4 +// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP34]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i64 4 -// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR23]], ptr noundef [[ADD_PTR24]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP34]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done28: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i64 4 +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR22]], ptr noundef [[ADD_PTR23]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN29:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN29]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN28]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN29]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done30: -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP37]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done29: +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -41,287 +41,293 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP34]] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = sdiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP42:%.*]] = add nuw i64 [[TMP41]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = mul nuw i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP43]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP50]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP51]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP46]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP51]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP53]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP54]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP55]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP56]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP57]], [[TMP58]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP59]], 1 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP65]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP66]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP69]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP66]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP74]], ptr [[TMP66]]) +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[TMP71]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP72]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP77]], ptr [[TMP69]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP76:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP76]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP79]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP78]]) -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP80]], i32 1) -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP84:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP84]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP86]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP87]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP81]]) +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP83]], i32 1) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP87:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP89]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP90]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP88]], [[TMP89]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP91]], [[TMP92]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP86]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP93]] monotonic, align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP96]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP96]] monotonic, align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP97:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP102:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP97]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP97]], i8 [[TMP100]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP102]] = extractvalue { i8, i1 } [[TMP101]], 0 -// CHECK1-NEXT: [[TMP103:%.*]] = extractvalue { i8, i1 } [[TMP101]], 1 -// CHECK1-NEXT: br i1 [[TMP103]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP100:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP105:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP100]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP100]], i8 [[TMP103]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP105]] = extractvalue { i8, i1 } [[TMP104]], 0 +// CHECK1-NEXT: [[TMP106:%.*]] = extractvalue { i8, i1 } [[TMP104]], 1 +// CHECK1-NEXT: br i1 [[TMP106]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP86]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP104]]) -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP105]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP106]]) +// CHECK1-NEXT: [[TMP107:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP107]]) +// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP109]]) // CHECK1-NEXT: ret void // // @@ -460,20 +466,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -487,7 +493,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/irbuilder_safelen.cpp b/clang/test/OpenMP/irbuilder_safelen.cpp --- a/clang/test/OpenMP/irbuilder_safelen.cpp +++ b/clang/test/OpenMP/irbuilder_safelen.cpp @@ -242,3 +242,18 @@ // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void +// +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 17.0.0"} +// CHECK: !3 = distinct !{!3, !4, !5} +// CHECK: !4 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !5 = !{!"llvm.loop.vectorize.width", i32 3} +// CHECK: !6 = distinct !{} +// CHECK: !7 = distinct !{!7, !8, !4} +// CHECK: !8 = !{!"llvm.loop.parallel_accesses", !6} +//. diff --git a/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp --- a/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp +++ b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp @@ -242,3 +242,20 @@ // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void +// +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !2 = !{!"clang version 17.0.0"} +// CHECK: !3 = distinct !{} +// CHECK: !4 = distinct !{!4, !5, !6, !7} +// CHECK: !5 = !{!"llvm.loop.parallel_accesses", !3} +// CHECK: !6 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !7 = !{!"llvm.loop.vectorize.width", i32 3} +// CHECK: !8 = distinct !{} +// CHECK: !9 = distinct !{!9, !10, !6} +// CHECK: !10 = !{!"llvm.loop.parallel_accesses", !8} +//. diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp --- a/clang/test/OpenMP/irbuilder_simd_aligned.cpp +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -280,3 +280,22 @@ // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void +// +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !2 = !{!"clang version 17.0.0"} +// CHECK: !3 = distinct !{!3, !4} +// CHECK: !4 = !{!"llvm.loop.mustprogress"} +// CHECK: !5 = distinct !{} +// CHECK: !6 = distinct !{!6, !7, !8} +// CHECK: !7 = !{!"llvm.loop.parallel_accesses", !5} +// CHECK: !8 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !9 = distinct !{} +// CHECK: !10 = distinct !{!10, !11, !8} +// CHECK: !11 = !{!"llvm.loop.parallel_accesses", !9} +//. diff --git a/clang/test/OpenMP/irbuilder_simdlen.cpp b/clang/test/OpenMP/irbuilder_simdlen.cpp --- a/clang/test/OpenMP/irbuilder_simdlen.cpp +++ b/clang/test/OpenMP/irbuilder_simdlen.cpp @@ -242,3 +242,20 @@ // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void +// +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 17.0.0"} +// CHECK: !3 = distinct !{} +// CHECK: !4 = distinct !{!4, !5, !6, !7} +// CHECK: !5 = !{!"llvm.loop.parallel_accesses", !3} +// CHECK: !6 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !7 = !{!"llvm.loop.vectorize.width", i32 3} +// CHECK: !8 = distinct !{} +// CHECK: !9 = distinct !{!9, !10, !6} +// CHECK: !10 = !{!"llvm.loop.parallel_accesses", !8} +//. diff --git a/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp b/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp --- a/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp +++ b/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp @@ -242,3 +242,18 @@ // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 // CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 // CHECK-NEXT: ret void +// +//. +// CHECK: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +// CHECK: attributes #1 = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } +//. +// CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 17.0.0"} +// CHECK: !3 = distinct !{!3, !4, !5} +// CHECK: !4 = !{!"llvm.loop.vectorize.enable", i1 true} +// CHECK: !5 = !{!"llvm.loop.vectorize.width", i32 2} +// CHECK: !6 = distinct !{} +// CHECK: !7 = distinct !{!7, !8, !4} +// CHECK: !8 = !{!"llvm.loop.parallel_accesses", !6} +//. diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,70 +444,68 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP14]], i32 1, ptr [[TMP23]], ptr [[TMP24]], i64 [[TMP27]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP23]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP20]], i32 1, ptr [[TMP29]], ptr [[TMP30]], i64 [[TMP33]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -577,7 +586,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -588,7 +597,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,70 +444,68 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP14]], i32 1, ptr [[TMP23]], ptr [[TMP24]], i64 [[TMP27]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP23]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP20]], i32 1, ptr [[TMP29]], ptr [[TMP30]], i64 [[TMP33]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -577,7 +586,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -588,7 +597,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.c b/clang/test/OpenMP/metadirective_device_kind_codegen.c --- a/clang/test/OpenMP/metadirective_device_kind_codegen.c +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple aarch64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple ppc64le-unknown-linux -emit-llvm %s -o - | FileCheck %s @@ -7,6 +8,26 @@ void bar(void); +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo(void) { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -35,46 +56,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @foo() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp --- a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,26 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -36,46 +57,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_implementation_codegen.cpp b/clang/test/OpenMP/metadirective_implementation_codegen.cpp --- a/clang/test/OpenMP/metadirective_implementation_codegen.cpp +++ b/clang/test/OpenMP/metadirective_implementation_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,22 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(implementation = {vendor(score(0) \ : llvm)}, \ @@ -37,40 +54,15 @@ ; } -// CHECK-LABEL: void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -47,51 +47,56 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]] // CHECK1: for.body: // CHECK1-NEXT: store i32 0, ptr [[K]], align 4 // CHECK1-NEXT: br label [[FOR_COND1:%.*]] // CHECK1: for.cond1: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 5 // CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body3: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[K]], align 4 // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4 // CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC5:%.*]] // CHECK1: for.inc5: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end7: // CHECK1-NEXT: ret void @@ -102,54 +107,60 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, ptr [[I]], ptr [[RES]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[RES]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RES]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]] // CHECK1: for.body: // CHECK1-NEXT: store i32 0, ptr [[K]], align 4 // CHECK1-NEXT: br label [[FOR_COND1:%.*]] // CHECK1: for.cond1: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 5 // CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body3: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP4]], align 4 // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4 // CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC5:%.*]] // CHECK1: for.inc5: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: for.end7: // CHECK1-NEXT: ret void @@ -160,170 +171,135 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]), !dbg [[DBG16:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG17:![0-9]+]] -// CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG18:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG16:![0-9]+]] +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP0]], align 8, !dbg [[DBG16]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG16]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG17:![0-9]+]] +// CHECK2-NEXT: ret i32 [[TMP1]], !dbg [[DBG18:![0-9]+]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG19:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META32:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG36:![0-9]+]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG39:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG38:![0-9]+]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10, !dbg [[DBG40:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG41:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG44:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG46:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG46:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG48:![0-9]+]] // CHECK2: for.cond1: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG47:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5, !dbg [[DBG49:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG50:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 5, !dbg [[DBG51:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG52:![0-9]+]] // CHECK2: for.body3: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG51:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1, !dbg [[DBG51]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG51]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG53:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG53:![0-9]+]] +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG53]] +// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG53]] +// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG55:![0-9]+]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG54]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG55:![0-9]+]], !llvm.loop [[LOOP56:![0-9]+]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG56:![0-9]+]] +// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG56]] +// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG56]] +// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG57:![0-9]+]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG61:![0-9]+]] // CHECK2: for.inc5: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG60:![0-9]+]] -// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG60]] -// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG60]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG61:![0-9]+]], !llvm.loop [[LOOP62:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG62:![0-9]+]] +// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG62]] +// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP2]], align 4, !dbg [[DBG62]] +// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG63:![0-9]+]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK2: for.end7: -// CHECK2-NEXT: ret void, !dbg [[DBG64:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR2]] !dbg [[DBG65:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG70:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG70]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG70]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG70]] -// CHECK2-NEXT: call void @.omp_outlined._debug__(ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]], !dbg [[DBG70]] -// CHECK2-NEXT: ret void, !dbg [[DBG70]] +// CHECK2-NEXT: ret void, !dbg [[DBG66:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv -// CHECK2-SAME: () #[[ATTR0]] !dbg [[DBG73:![0-9]+]] { +// CHECK2-SAME: () #[[ATTR0]] !dbg [[DBG69:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined..2, ptr [[I]], ptr [[RES]]), !dbg [[DBG78:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG79:![0-9]+]] -// CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG80:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR2]] !dbg [[DBG81:![0-9]+]] { +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG74:![0-9]+]] +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP0]], align 8, !dbg [[DBG74]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG74]] +// CHECK2-NEXT: store ptr [[RES]], ptr [[TMP1]], align 8, !dbg [[DBG74]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG74]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG75:![0-9]+]] +// CHECK2-NEXT: ret i32 [[TMP2]], !dbg [[DBG76:![0-9]+]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] !dbg [[DBG77:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG88:![0-9]+]] -// CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES_ADDR]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG90:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG91:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG91]] -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG92:![0-9]+]] -// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG94:![0-9]+]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG88:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG88]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG88]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG88]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG88]] +// CHECK2-NEXT: store i32 0, ptr [[TMP2]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG91:![0-9]+]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG95:![0-9]+]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG97:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG98:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP5]], 10, !dbg [[DBG94:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG95:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG103]] -// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]] +// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG100]] +// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG101:![0-9]+]] // CHECK2: for.cond1: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5, !dbg [[DBG107:![0-9]+]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG108:![0-9]+]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG102:![0-9]+]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 5, !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG105:![0-9]+]] // CHECK2: for.body3: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG109:![0-9]+]] -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG109]] -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4, !dbg [[DBG109]] -// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG111:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG106]] +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP4]], align 4, !dbg [[DBG106]] +// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG108:![0-9]+]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG112]] -// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG112]] -// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG113:![0-9]+]], !llvm.loop [[LOOP114:![0-9]+]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG109:![0-9]+]] +// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG109]] +// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG109]] +// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG110:![0-9]+]], !llvm.loop [[LOOP111:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG116:![0-9]+]] +// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG113:![0-9]+]] // CHECK2: for.inc5: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG117:![0-9]+]] -// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG117]] -// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG117]] -// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG118:![0-9]+]], !llvm.loop [[LOOP119:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG114:![0-9]+]] +// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP9]], 1, !dbg [[DBG114]] +// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP2]], align 4, !dbg [[DBG114]] +// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG115:![0-9]+]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK2: for.end7: -// CHECK2-NEXT: ret void, !dbg [[DBG121:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR2]] !dbg [[DBG122:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124]] -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124]] -// CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG128:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG128]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG128]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG128]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG128]] -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG128]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.1(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR3]], !dbg [[DBG128]] -// CHECK2-NEXT: ret void, !dbg [[DBG128]] +// CHECK2-NEXT: ret void, !dbg [[DBG118:![0-9]+]] // // // CHECK3-LABEL: define {{[^@]+}}@_Z12outline_declv diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -276,7 +276,7 @@ // CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -286,15 +286,14 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[TMP2]], align 1 // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-64-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -302,179 +301,195 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG19:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-64-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP9]], 10 +// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK-64-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK-64-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP21]], i64 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK-64-NEXT: store i64 [[TMP13]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i64 [[TMP15]], i64* [[TMP14]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i64 [[TMP17]], i64* [[TMP16]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK-64-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP18]], align 8, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP21:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP20]], i8* [[TMP21]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to %struct.anon.0* +// CHECK-64-NEXT: store [[STRUCT_ANON_0]] [[TMP23]], %struct.anon.0* [[TMP24]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK-64-NEXT: [[TMP26:%.*]] = zext i1 [[TOBOOL5]] to i32 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP26]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP22]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP21]], i64 24), !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP28]], 9 -// CHECK-64-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-64: cond.true8: -// CHECK-64-NEXT: br label [[COND_END10:%.*]] -// CHECK-64: cond.false9: -// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: br label [[COND_END10]] -// CHECK-64: cond.end10: -// CHECK-64-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] -// CHECK-64-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK-64-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP33]], 9 +// CHECK-64-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-64: cond.true9: +// CHECK-64-NEXT: br label [[COND_END11:%.*]] +// CHECK-64: cond.false10: +// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: br label [[COND_END11]] +// CHECK-64: cond.end11: +// CHECK-64-NEXT: [[COND12:%.*]] = phi i32 [ 9, [[COND_TRUE9]] ], [ [[TMP34]], [[COND_FALSE10]] ] +// CHECK-64-NEXT: store i32 [[COND12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-64-NEXT: store i32 [[TMP35]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: -// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-64: omp.inner.for.cond12: -// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP31]], 10 -// CHECK-64-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-64: omp.inner.for.body14: -// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 -// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP35:%.*]] = zext i32 [[TMP34]] to i64 -// CHECK-64-NEXT: [[TMP36:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP36]] to i1 -// CHECK-64-NEXT: [[CONV17:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED16]] to i8* -// CHECK-64-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-64-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-64-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED16]], align 8 -// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP33]] to i8* -// CHECK-64-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 8 -// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP35]] to i8* -// CHECK-64-NEXT: store i8* [[TMP41]], i8** [[TMP40]], align 8 -// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP37]] to i8* -// CHECK-64-NEXT: store i8* [[TMP43]], i8** [[TMP42]], align 8 -// CHECK-64-NEXT: [[TMP44:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP44]] to i1 -// CHECK-64-NEXT: [[TMP45:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-64-NEXT: [[TMP46:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP45]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP46]], i64 3) +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] +// CHECK-64: omp.inner.for.cond13: +// CHECK-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP14:%.*]] = icmp slt i32 [[TMP36]], 10 +// CHECK-64-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-64: omp.inner.for.body15: +// CHECK-64-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i64 +// CHECK-64-NEXT: store i64 [[TMP38]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP40:%.*]] = zext i32 [[TMP39]] to i64 +// CHECK-64-NEXT: store i64 [[TMP40]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP42]], i64* [[TMP41]], align 8 +// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP44]], i64* [[TMP43]], align 8 +// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP46:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-64-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP46]] to i1 +// CHECK-64-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL17]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL18]], i8* [[TMP45]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG19]] to i8* +// CHECK-64-NEXT: [[TMP48:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK-64-NEXT: [[TMP49:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP47]], i8* [[TMP48]]) +// CHECK-64-NEXT: [[TMP50:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], align 1 +// CHECK-64-NEXT: [[TMP51:%.*]] = bitcast i8* [[TMP49]] to %struct.anon.0* +// CHECK-64-NEXT: store [[STRUCT_ANON_0]] [[TMP50]], %struct.anon.0* [[TMP51]], align 1 +// CHECK-64-NEXT: [[TMP52:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-64-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP52]] to i1 +// CHECK-64-NEXT: [[TMP53:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP53]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__2 to i8*), i8* null, i8* [[TMP49]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP48]], i64 24) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] // CHECK-64: omp.inner.for.inc21: -// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP54]], [[TMP55]] // CHECK-64-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP56]], [[TMP57]] // CHECK-64-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP51]], [[TMP52]] +// CHECK-64-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP58]], [[TMP59]] // CHECK-64-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP53]], 9 +// CHECK-64-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP60]], 9 // CHECK-64-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] // CHECK-64: cond.true26: // CHECK-64-NEXT: br label [[COND_END28:%.*]] // CHECK-64: cond.false27: -// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END28]] // CHECK-64: cond.end28: -// CHECK-64-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP54]], [[COND_FALSE27]] ] +// CHECK-64-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP61]], [[COND_FALSE27]] ] // CHECK-64-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-64-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP62]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP134:![0-9]+]] // CHECK-64: omp.inner.for.end30: // CHECK-64-NEXT: br label [[OMP_IF_END]] // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK-64-NEXT: br i1 [[TMP57]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-64-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 +// CHECK-64-NEXT: br i1 [[TMP64]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -483,13 +498,14 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -499,77 +515,86 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] -// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP136]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] // CHECK-64: omp.inner.for.cond5: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP22]] // CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] // CHECK-64: omp.inner.for.body8: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] // CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] // CHECK-64: omp.body.continue11: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] // CHECK-64: omp.inner.for.inc12: -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP139:![0-9]+]] // CHECK-64: omp.inner.for.end14: @@ -577,12 +602,12 @@ // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -591,13 +616,14 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -607,77 +633,86 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-64-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-64-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-64-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-64: omp_if.then: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] -// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP140]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-64: omp_if.else: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5:%.*]] // CHECK-64: omp.inner.for.cond5: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP15]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV6:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP22]] // CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END14:%.*]] // CHECK-64: omp.inner.for.body8: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] // CHECK-64-NEXT: store i32 [[ADD10]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE11:%.*]] // CHECK-64: omp.body.continue11: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC12:%.*]] // CHECK-64: omp.inner.for.inc12: -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND5]], !llvm.loop [[LOOP143:![0-9]+]] // CHECK-64: omp.inner.for.end14: @@ -685,12 +720,12 @@ // CHECK-64: omp_if.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -701,6 +736,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -710,7 +746,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -718,94 +754,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.2* +// CHECK-64-NEXT: store [[STRUCT_ANON_2]] [[TMP18]], %struct.anon.2* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__4 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -814,12 +864,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -829,60 +880,66 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-64-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -893,6 +950,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -902,7 +960,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -910,94 +968,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.4* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.4* +// CHECK-64-NEXT: store [[STRUCT_ANON_4]] [[TMP18]], %struct.anon.4* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.4*)* @__omp_outlined__6 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1006,12 +1078,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1021,51 +1094,57 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP153]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1076,6 +1155,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1085,7 +1165,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1093,94 +1173,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.6* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.6* +// CHECK-64-NEXT: store [[STRUCT_ANON_6]] [[TMP18]], %struct.anon.6* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.6*)* @__omp_outlined__8 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1189,12 +1283,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1204,48 +1299,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -1253,9 +1354,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1266,6 +1367,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1275,7 +1377,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1283,94 +1385,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.8* +// CHECK-64-NEXT: store [[STRUCT_ANON_8]] [[TMP18]], %struct.anon.8* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1379,12 +1495,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1394,48 +1511,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -1443,9 +1566,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1456,6 +1579,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1465,7 +1589,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1473,94 +1597,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.10* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.10* +// CHECK-64-NEXT: store [[STRUCT_ANON_10]] [[TMP18]], %struct.anon.10* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.10*)* @__omp_outlined__12 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1569,12 +1707,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__12 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1584,48 +1723,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -1633,9 +1778,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1646,6 +1791,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1655,7 +1801,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1663,94 +1809,108 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__13 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_12]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.12* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.12* +// CHECK-64-NEXT: store [[STRUCT_ANON_12]] [[TMP18]], %struct.anon.12* [[TMP19]], align 8, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.12*)* @__omp_outlined__14 to i8*), i8* null, i8* [[TMP17]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16), !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1759,12 +1919,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__14 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1774,48 +1935,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -1823,9 +1990,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1837,7 +2004,7 @@ // CHECK-64-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 @@ -1847,13 +2014,12 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1861,24 +2027,30 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__15 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_14]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK-64-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) // CHECK-64-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1886,85 +2058,89 @@ // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* -// CHECK-64-NEXT: store i32 [[TMP10]], i32* [[CONV3]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 8 -// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to i8* -// CHECK-64-NEXT: store i8* [[TMP17]], i8** [[TMP16]], align 8 -// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP18]], i64 3) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP16]], i64* [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-64-NEXT: store i32 [[TMP18]], i32* [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast %struct.anon.14* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP20:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 24) +// CHECK-64-NEXT: [[TMP21:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP19]], i8* [[TMP20]]) +// CHECK-64-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to %struct.anon.14* +// CHECK-64-NEXT: store [[STRUCT_ANON_14]] [[TMP22]], %struct.anon.14* [[TMP23]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.14*)* @__omp_outlined__16 to i8*), i8* null, i8* [[TMP21]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP20]], i64 24) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP25]], 9 -// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK-64: cond.true7: -// CHECK-64-NEXT: br label [[COND_END9:%.*]] -// CHECK-64: cond.false8: -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: br label [[COND_END9]] -// CHECK-64: cond.end9: -// CHECK-64-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP26]], [[COND_FALSE8]] ] -// CHECK-64-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP30]], 9 +// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK-64: cond.true6: +// CHECK-64-NEXT: br label [[COND_END8:%.*]] +// CHECK-64: cond.false7: +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: br label [[COND_END8]] +// CHECK-64: cond.end8: +// CHECK-64-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP31]], [[COND_FALSE7]] ] +// CHECK-64-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-64-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-64-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-64: .omp.lastprivate.then: -// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-64-NEXT: store i32 [[TMP30]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP35:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-64-NEXT: store i32 [[TMP35]], i32* [[A]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-64: .omp.lastprivate.done: // CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) @@ -1972,75 +2148,83 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__16 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[A3:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A2:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 8 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV4:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP6]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[A3]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-64-NEXT: store i32 [[TMP15]], i32* [[A2]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-64: .omp.lastprivate.then: -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK-64-NEXT: store i32 [[TMP13]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[A2]], align 4 +// CHECK-64-NEXT: store i32 [[TMP20]], i32* [[A]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-64: .omp.lastprivate.done: // CHECK-64-NEXT: ret void @@ -2049,6 +2233,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2058,7 +2243,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2066,101 +2251,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__17 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_17]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.17* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_17]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.17* +// CHECK-64-NEXT: store [[STRUCT_ANON_17]] [[TMP18]], %struct.anon.17* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.17*)* @__omp_outlined__18 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__18 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2170,63 +2370,70 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], %struct.anon.17* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2236,7 +2443,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2244,101 +2451,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__19 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_19]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.19* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_19]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.19* +// CHECK-64-NEXT: store [[STRUCT_ANON_19]] [[TMP18]], %struct.anon.19* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.19*)* @__omp_outlined__20 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__20 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2348,54 +2570,61 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], %struct.anon.19* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], %struct.anon.19* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2405,7 +2634,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2413,101 +2642,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__21 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_21]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.21* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_21]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.21* +// CHECK-64-NEXT: store [[STRUCT_ANON_21]] [[TMP18]], %struct.anon.21* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.21*)* @__omp_outlined__22 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__22 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2517,48 +2761,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], %struct.anon.21* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -2572,6 +2822,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2581,7 +2832,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2589,101 +2840,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__23 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_23]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.23* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_23]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.23* +// CHECK-64-NEXT: store [[STRUCT_ANON_23]] [[TMP18]], %struct.anon.23* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.23*)* @__omp_outlined__24 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__24 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2693,48 +2959,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], %struct.anon.23* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -2748,6 +3020,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2757,7 +3030,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2765,101 +3038,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__25 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_25]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.25* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_25]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.25* +// CHECK-64-NEXT: store [[STRUCT_ANON_25]] [[TMP18]], %struct.anon.25* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.25*)* @__omp_outlined__26 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__26 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2869,48 +3157,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], %struct.anon.25* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], %struct.anon.25* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -2924,6 +3218,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -2933,7 +3228,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2941,101 +3236,116 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__27 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_27]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: store i64 [[TMP14]], i64* [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.27* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP16:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP15]], i8* [[TMP16]]) +// CHECK-64-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_27]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP17]] to %struct.anon.27* +// CHECK-64-NEXT: store [[STRUCT_ANON_27]] [[TMP18]], %struct.anon.27* [[TMP19]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.27*)* @__omp_outlined__28 to i8*), i8* null, i8* [[TMP17]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP16]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP26]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP27]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__28 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.27* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.27*, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3045,48 +3355,54 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.27* [[__CONTEXT]], %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.27*, %struct.anon.27** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], %struct.anon.27* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], %struct.anon.27* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -3100,6 +3416,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3109,7 +3426,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -3117,10 +3434,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__29 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.28* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.28*, align 8 // CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3128,84 +3446,95 @@ // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_29]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.28* [[__CONTEXT]], %struct.anon.28** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_29]], %struct.anon.29* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_29]], %struct.anon.29* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.29* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_29]], %struct.anon.29* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.29* +// CHECK-64-NEXT: store [[STRUCT_ANON_29]] [[TMP16]], %struct.anon.29* [[TMP17]], align 8, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.29*)* @__omp_outlined__30 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16), !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3214,12 +3543,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__30 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.29* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.29*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3229,51 +3557,55 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.29* [[__CONTEXT]], %struct.anon.29** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.29*, %struct.anon.29** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_29:%.*]], %struct.anon.29* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_29]], %struct.anon.29* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP195]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3284,6 +3616,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_30:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3293,7 +3626,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -3301,10 +3634,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__31 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.30* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.30*, align 8 // CHECK-64-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3312,86 +3646,97 @@ // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_31]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i64 12, i1 false) +// CHECK-64-NEXT: store %struct.anon.30* [[__CONTEXT]], %struct.anon.30** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.30*, %struct.anon.30** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i64 12, i1 false) // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], %struct.anon.31* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], %struct.anon.31* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.31* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP16:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP14]], i8* [[TMP15]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON_31]], %struct.anon.31* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP16]] to %struct.anon.31* +// CHECK-64-NEXT: store [[STRUCT_ANON_31]] [[TMP17]], %struct.anon.31* [[TMP18]], align 8, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.31*)* @__omp_outlined__32 to i8*), i8* null, i8* [[TMP16]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP15]], i64 16), !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-64-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-64-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK-64-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3400,12 +3745,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__32 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.31* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.31*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3415,60 +3759,64 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.31* [[__CONTEXT]], %struct.anon.31** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_31:%.*]], %struct.anon.31* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], %struct.anon.31* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-64-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3479,6 +3827,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -3488,7 +3837,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -3496,10 +3845,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__33 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.32* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.32*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -3507,9 +3857,16 @@ // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_33:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_33]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.32* [[__CONTEXT]], %struct.anon.32** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.32*, %struct.anon.32** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]] to i64* +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]] to i64* // CHECK-64-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) // CHECK-64-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** // CHECK-64-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 4) @@ -3518,55 +3875,61 @@ // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK-64-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK-64-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP15]], i64 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: store i64 [[TMP11]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_33]], %struct.anon.33* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_33]], %struct.anon.33* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], i64** [[TMP13]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.33* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP16:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP14]], i8* [[TMP15]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON_33]], %struct.anon.33* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP16]] to %struct.anon.33* +// CHECK-64-NEXT: store [[STRUCT_ANON_33]] [[TMP17]], %struct.anon.33* [[TMP18]], align 8, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.33*)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8* [[TMP16]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP15]], i64 16), !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3574,16 +3937,17 @@ // CHECK-64-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 8 // CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) // CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[C]], i64 8) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64 8) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64 8) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__34 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.33* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.33*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3593,51 +3957,55 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.33* [[__CONTEXT]], %struct.anon.33** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.33*, %struct.anon.33** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_33:%.*]], %struct.anon.33* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_33]], %struct.anon.33* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP207]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3651,25 +4019,22 @@ // CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 8 // CHECK-64-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* -// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* -// CHECK-64-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8 -// CHECK-64-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i64 [[TMP5]], i64 [[TMP8]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.33** +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.33** [[TMP3]] to %struct.anon.33* +// CHECK-64-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.33* [[TMP4]]) #[[ATTR2]] // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3679,7 +4044,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -3687,94 +4052,106 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__35 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.34* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.34*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_35]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.34* [[__CONTEXT]], %struct.anon.34** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_35]], %struct.anon.35* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_35]], %struct.anon.35* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.35* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_35]], %struct.anon.35* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.35* +// CHECK-64-NEXT: store [[STRUCT_ANON_35]] [[TMP16]], %struct.anon.35* [[TMP17]], align 8, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.35*)* @__omp_outlined__36 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16), !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3783,12 +4160,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__36 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.35* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.35*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3798,48 +4174,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.35* [[__CONTEXT]], %struct.anon.35** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.35*, %struct.anon.35** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_35:%.*]], %struct.anon.35* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_35]], %struct.anon.35* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -3847,9 +4227,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3860,6 +4240,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_36:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3869,7 +4250,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -3877,94 +4258,106 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__37 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.36* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.36*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_37]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.36* [[__CONTEXT]], %struct.anon.36** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.36*, %struct.anon.36** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_37]], %struct.anon.37* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_37]], %struct.anon.37* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.37* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_37]], %struct.anon.37* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.37* +// CHECK-64-NEXT: store [[STRUCT_ANON_37]] [[TMP16]], %struct.anon.37* [[TMP17]], align 8, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.37*)* @__omp_outlined__38 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16), !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3973,12 +4366,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__38 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.37* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.37*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3988,48 +4380,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.37* [[__CONTEXT]], %struct.anon.37** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_37:%.*]], %struct.anon.37* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_37]], %struct.anon.37* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -4037,9 +4433,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4050,6 +4446,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4059,7 +4456,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4067,94 +4464,106 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__39 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.38* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.38*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_39:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_39]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.38* [[__CONTEXT]], %struct.anon.38** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.38*, %struct.anon.38** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_39]], %struct.anon.39* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_39]], %struct.anon.39* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.39* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_39]], %struct.anon.39* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.39* +// CHECK-64-NEXT: store [[STRUCT_ANON_39]] [[TMP16]], %struct.anon.39* [[TMP17]], align 8, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.39*)* @__omp_outlined__40 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16), !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4163,12 +4572,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__40 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.39* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.39*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4178,48 +4586,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.39* [[__CONTEXT]], %struct.anon.39** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.39*, %struct.anon.39** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_39:%.*]], %struct.anon.39* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_39]], %struct.anon.39* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -4227,9 +4639,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4240,6 +4652,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4249,7 +4662,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4257,94 +4670,106 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__41 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.40* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.40*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_41]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.40* [[__CONTEXT]], %struct.anon.40** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.40*, %struct.anon.40** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP14]], i64 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_41]], %struct.anon.41* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_41]], %struct.anon.41* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.41* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_41]], %struct.anon.41* [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.41* +// CHECK-64-NEXT: store [[STRUCT_ANON_41]] [[TMP16]], %struct.anon.41* [[TMP17]], align 8, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.41*)* @__omp_outlined__42 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16), !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4353,12 +4778,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__42 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.41* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.41*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4368,48 +4792,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.41* [[__CONTEXT]], %struct.anon.41** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.41*, %struct.anon.41** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_41:%.*]], %struct.anon.41* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_41]], %struct.anon.41* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -4417,9 +4845,9 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4430,6 +4858,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_42:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4439,7 +4868,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4447,101 +4876,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__43 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.42* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.42*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_43:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_43]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.42* [[__CONTEXT]], %struct.anon.42** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.42*, %struct.anon.42** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_43]], %struct.anon.43* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_43]], %struct.anon.43* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.43* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_43]], %struct.anon.43* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.43* +// CHECK-64-NEXT: store [[STRUCT_ANON_43]] [[TMP16]], %struct.anon.43* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.43*)* @__omp_outlined__44 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__44 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.43* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.43*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4551,54 +4991,59 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.43* [[__CONTEXT]], %struct.anon.43** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.43*, %struct.anon.43** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_43:%.*]], %struct.anon.43* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_43]], %struct.anon.43* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_44:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4608,7 +5053,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4616,101 +5061,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__45 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.44* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.44*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_45:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_45]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.44* [[__CONTEXT]], %struct.anon.44** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.44*, %struct.anon.44** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_45]], %struct.anon.45* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_45]], %struct.anon.45* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.45* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_45]], %struct.anon.45* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.45* +// CHECK-64-NEXT: store [[STRUCT_ANON_45]] [[TMP16]], %struct.anon.45* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.45*)* @__omp_outlined__46 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__46 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.45* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.45*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4720,63 +5176,68 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.45* [[__CONTEXT]], %struct.anon.45** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.45*, %struct.anon.45** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_45:%.*]], %struct.anon.45* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_45]], %struct.anon.45* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_46:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4786,7 +5247,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4794,101 +5255,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__47 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.46* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.46*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_47:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_47]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.46* [[__CONTEXT]], %struct.anon.46** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.46*, %struct.anon.46** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_47]], %struct.anon.47* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_47]], %struct.anon.47* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.47* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_47]], %struct.anon.47* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.47* +// CHECK-64-NEXT: store [[STRUCT_ANON_47]] [[TMP16]], %struct.anon.47* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.47*)* @__omp_outlined__48 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__48 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.47* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.47*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4898,54 +5370,59 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.47* [[__CONTEXT]], %struct.anon.47** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.47*, %struct.anon.47** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_47:%.*]], %struct.anon.47* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_47]], %struct.anon.47* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_48:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -4955,7 +5432,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -4963,101 +5440,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__49 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.48* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.48*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_49:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_49]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.48* [[__CONTEXT]], %struct.anon.48** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.48*, %struct.anon.48** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_49]], %struct.anon.49* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_49]], %struct.anon.49* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.49* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_49]], %struct.anon.49* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.49* +// CHECK-64-NEXT: store [[STRUCT_ANON_49]] [[TMP16]], %struct.anon.49* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.49*)* @__omp_outlined__50 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__50 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.49* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.49*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5067,48 +5555,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.49* [[__CONTEXT]], %struct.anon.49** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.49*, %struct.anon.49** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_49:%.*]], %struct.anon.49* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_49]], %struct.anon.49* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -5122,6 +5614,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_50:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -5131,7 +5624,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -5139,101 +5632,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__51 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.50* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.50*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_51:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_51]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.50* [[__CONTEXT]], %struct.anon.50** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.50*, %struct.anon.50** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_51]], %struct.anon.51* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_51]], %struct.anon.51* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.51* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_51]], %struct.anon.51* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.51* +// CHECK-64-NEXT: store [[STRUCT_ANON_51]] [[TMP16]], %struct.anon.51* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.51*)* @__omp_outlined__52 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__52 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.51* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.51*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5243,48 +5747,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.51* [[__CONTEXT]], %struct.anon.51** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.51*, %struct.anon.51** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_51:%.*]], %struct.anon.51* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_51]], %struct.anon.51* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -5298,6 +5806,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_52:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -5307,7 +5816,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -5315,101 +5824,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__53 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.52* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.52*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_53:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_53]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.52* [[__CONTEXT]], %struct.anon.52** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.52*, %struct.anon.52** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_53]], %struct.anon.53* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_53]], %struct.anon.53* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.53* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_53]], %struct.anon.53* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.53* +// CHECK-64-NEXT: store [[STRUCT_ANON_53]] [[TMP16]], %struct.anon.53* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.53*)* @__omp_outlined__54 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__54 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.53* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.53*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5419,48 +5939,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.53* [[__CONTEXT]], %struct.anon.53** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.53*, %struct.anon.53** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_53:%.*]], %struct.anon.53* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_53]], %struct.anon.53* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -5474,6 +5998,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_54:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -5483,7 +6008,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -5491,101 +6016,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__55 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.54* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.54*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_55:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_55]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.54* [[__CONTEXT]], %struct.anon.54** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.54*, %struct.anon.54** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 -// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_55]], %struct.anon.55* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_55]], %struct.anon.55* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.55* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_55]], %struct.anon.55* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.55* +// CHECK-64-NEXT: store [[STRUCT_ANON_55]] [[TMP16]], %struct.anon.55* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.55*)* @__omp_outlined__56 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__56 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.55* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.55*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5595,48 +6131,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.55* [[__CONTEXT]], %struct.anon.55** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.55*, %struct.anon.55** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_55:%.*]], %struct.anon.55* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_55]], %struct.anon.55* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -5650,6 +6190,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_56:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -5659,7 +6200,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -5667,101 +6208,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__57 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.56* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.56*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_57:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_57]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.56* [[__CONTEXT]], %struct.anon.56** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.56*, %struct.anon.56** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_57]], %struct.anon.57* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_57]], %struct.anon.57* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.57* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_57]], %struct.anon.57* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.57* +// CHECK-64-NEXT: store [[STRUCT_ANON_57]] [[TMP16]], %struct.anon.57* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.57*)* @__omp_outlined__58 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__58 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.57* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.57*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5771,54 +6323,59 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.57* [[__CONTEXT]], %struct.anon.57** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.57*, %struct.anon.57** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_57:%.*]], %struct.anon.57* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_57]], %struct.anon.57* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_58:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -5828,7 +6385,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -5836,101 +6393,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__59 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.58* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.58*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_59:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_59]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.58* [[__CONTEXT]], %struct.anon.58** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.58*, %struct.anon.58** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_59]], %struct.anon.59* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_59]], %struct.anon.59* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.59* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_59]], %struct.anon.59* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.59* +// CHECK-64-NEXT: store [[STRUCT_ANON_59]] [[TMP16]], %struct.anon.59* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.59*)* @__omp_outlined__60 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__60 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.59* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.59*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5940,63 +6508,68 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.59* [[__CONTEXT]], %struct.anon.59** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.59*, %struct.anon.59** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_59:%.*]], %struct.anon.59* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_59]], %struct.anon.59* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_60:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6006,7 +6579,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6014,101 +6587,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__61 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.60* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.60*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_61:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_61]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.60* [[__CONTEXT]], %struct.anon.60** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.60*, %struct.anon.60** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_61]], %struct.anon.61* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_61]], %struct.anon.61* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.61* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_61]], %struct.anon.61* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.61* +// CHECK-64-NEXT: store [[STRUCT_ANON_61]] [[TMP16]], %struct.anon.61* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.61*)* @__omp_outlined__62 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__62 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.61* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.61*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6118,54 +6702,59 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.61* [[__CONTEXT]], %struct.anon.61** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.61*, %struct.anon.61** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_61:%.*]], %struct.anon.61* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_61]], %struct.anon.61* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_62:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6175,7 +6764,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6183,101 +6772,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__63 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.62* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.62*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_63:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_63]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.62* [[__CONTEXT]], %struct.anon.62** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.62*, %struct.anon.62** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_63]], %struct.anon.63* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_63]], %struct.anon.63* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.63* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_63]], %struct.anon.63* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.63* +// CHECK-64-NEXT: store [[STRUCT_ANON_63]] [[TMP16]], %struct.anon.63* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.63*)* @__omp_outlined__64 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__64 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.63* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.63*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6287,48 +6887,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.63* [[__CONTEXT]], %struct.anon.63** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.63*, %struct.anon.63** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_63:%.*]], %struct.anon.63* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_63]], %struct.anon.63* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -6342,6 +6946,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_64:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6351,7 +6956,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6359,101 +6964,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__65 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.64* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.64*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_65:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_65]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.64* [[__CONTEXT]], %struct.anon.64** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.64*, %struct.anon.64** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_65]], %struct.anon.65* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_65]], %struct.anon.65* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.65* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_65]], %struct.anon.65* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.65* +// CHECK-64-NEXT: store [[STRUCT_ANON_65]] [[TMP16]], %struct.anon.65* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.65*)* @__omp_outlined__66 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__66 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.65* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.65*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6463,48 +7079,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.65* [[__CONTEXT]], %struct.anon.65** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.65*, %struct.anon.65** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_65:%.*]], %struct.anon.65* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_65]], %struct.anon.65* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -6518,6 +7138,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_66:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6527,7 +7148,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6535,101 +7156,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__67 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.66* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.66*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_67:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_67]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.66* [[__CONTEXT]], %struct.anon.66** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.66*, %struct.anon.66** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_67]], %struct.anon.67* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_67]], %struct.anon.67* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.67* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_67]], %struct.anon.67* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.67* +// CHECK-64-NEXT: store [[STRUCT_ANON_67]] [[TMP16]], %struct.anon.67* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.67*)* @__omp_outlined__68 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__68 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.67* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.67*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6639,48 +7271,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.67* [[__CONTEXT]], %struct.anon.67** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.67*, %struct.anon.67** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_67:%.*]], %struct.anon.67* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_67]], %struct.anon.67* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -6694,6 +7330,7 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 // CHECK-64-SAME: () #[[ATTR0]] { // CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_68:%.*]], align 1 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6703,7 +7340,7 @@ // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-64-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-64-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6711,101 +7348,112 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__69 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.68* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.68*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_69:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_69]], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.68* [[__CONTEXT]], %struct.anon.68** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.68*, %struct.anon.68** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to i8* -// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to i8* -// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP14]], i64 2) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK-64-NEXT: store i64 [[TMP10]], i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_69]], %struct.anon.69* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i64** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_69]], %struct.anon.69* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i64* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i64** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.69* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_69]], %struct.anon.69* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.69* +// CHECK-64-NEXT: store [[STRUCT_ANON_69]] [[TMP16]], %struct.anon.69* [[TMP17]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.69*)* @__omp_outlined__70 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i64 16) // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], 9 +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-64: cond.true5: // CHECK-64-NEXT: br label [[COND_END7:%.*]] // CHECK-64: cond.false6: -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END7]] // CHECK-64: cond.end7: -// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP22]], [[COND_FALSE6]] ] +// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-64-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP23]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__70 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.69* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.69*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6815,48 +7463,52 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.69* [[__CONTEXT]], %struct.anon.69** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.69*, %struct.anon.69** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_69:%.*]], %struct.anon.69* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64*, i64** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_69]], %struct.anon.69* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64*, i64** [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK-64-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -6871,7 +7523,8 @@ // CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_70:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_70]], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -6879,11 +7532,17 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.70* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_70]], %struct.anon.70* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.70* +// CHECK-64-NEXT: store [[STRUCT_ANON_70]] [[TMP5]], %struct.anon.70* [[TMP6]], align 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.70*)* @__omp_outlined__71 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6891,10 +7550,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__71 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.70* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.70*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6904,80 +7564,89 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.70* [[__CONTEXT]], %struct.anon.70** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.70*, %struct.anon.70** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_71:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_71]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.71* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_71]], %struct.anon.71* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.71* +// CHECK-64-NEXT: store [[STRUCT_ANON_71]] [[TMP5]], %struct.anon.71* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.71*)* @__omp_outlined__72 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -6985,10 +7654,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__72 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.71* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.71*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6998,63 +7668,72 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.71* [[__CONTEXT]], %struct.anon.71** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.71*, %struct.anon.71** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_72:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_72]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.72* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_72]], %struct.anon.72* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.72* +// CHECK-64-NEXT: store [[STRUCT_ANON_72]] [[TMP5]], %struct.anon.72* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.72*)* @__omp_outlined__73 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7062,10 +7741,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__73 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.72* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.72*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7075,80 +7755,89 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.72* [[__CONTEXT]], %struct.anon.72** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.72*, %struct.anon.72** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_73:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_73]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.73* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_73]], %struct.anon.73* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.73* +// CHECK-64-NEXT: store [[STRUCT_ANON_73]] [[TMP5]], %struct.anon.73* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.73*)* @__omp_outlined__74 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7156,10 +7845,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__74 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.73* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.73*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7169,38 +7859,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.73* [[__CONTEXT]], %struct.anon.73** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.73*, %struct.anon.73** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7214,14 +7906,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_74:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_74]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.74* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_74]], %struct.anon.74* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.74* +// CHECK-64-NEXT: store [[STRUCT_ANON_74]] [[TMP5]], %struct.anon.74* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.74*)* @__omp_outlined__75 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7229,10 +7928,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__75 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.74* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.74*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7242,38 +7942,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.74* [[__CONTEXT]], %struct.anon.74** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.74*, %struct.anon.74** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7287,14 +7989,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_75:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_75]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.75* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_75]], %struct.anon.75* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.75* +// CHECK-64-NEXT: store [[STRUCT_ANON_75]] [[TMP5]], %struct.anon.75* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.75*)* @__omp_outlined__76 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7302,10 +8011,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__76 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.75* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.75*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7315,38 +8025,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.75* [[__CONTEXT]], %struct.anon.75** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.75*, %struct.anon.75** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7360,14 +8072,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_76:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_76]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.76* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_76]], %struct.anon.76* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.76* +// CHECK-64-NEXT: store [[STRUCT_ANON_76]] [[TMP5]], %struct.anon.76* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.76*)* @__omp_outlined__77 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7375,10 +8094,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__77 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.76* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.76*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7388,38 +8108,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.76* [[__CONTEXT]], %struct.anon.76** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.76*, %struct.anon.76** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7434,7 +8156,8 @@ // CHECK-64-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_77:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_77]], align 8 // CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -7442,11 +8165,17 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.77* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_77]], %struct.anon.77* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.77* +// CHECK-64-NEXT: store [[STRUCT_ANON_77]] [[TMP5]], %struct.anon.77* [[TMP6]], align 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-64-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.77*)* @__omp_outlined__78 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7454,10 +8183,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__78 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.77* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.77*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7467,88 +8197,97 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.77* [[__CONTEXT]], %struct.anon.77** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.77*, %struct.anon.77** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-64-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_78:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_78]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.78* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_78]], %struct.anon.78* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.78* +// CHECK-64-NEXT: store [[STRUCT_ANON_78]] [[TMP5]], %struct.anon.78* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.78*)* @__omp_outlined__79 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7556,10 +8295,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__79 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.78* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.78*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7569,71 +8309,80 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.78* [[__CONTEXT]], %struct.anon.78** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.78*, %struct.anon.78** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_79:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_79]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.79* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_79]], %struct.anon.79* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.79* +// CHECK-64-NEXT: store [[STRUCT_ANON_79]] [[TMP5]], %struct.anon.79* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.79*)* @__omp_outlined__80 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7641,10 +8390,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__80 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.79* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.79*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7654,88 +8404,97 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.79* [[__CONTEXT]], %struct.anon.79** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.79*, %struct.anon.79** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-64-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_80:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_80]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.80* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_80]], %struct.anon.80* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.80* +// CHECK-64-NEXT: store [[STRUCT_ANON_80]] [[TMP5]], %struct.anon.80* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.80*)* @__omp_outlined__81 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7743,10 +8502,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__81 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.80* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.80*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7756,38 +8516,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.80* [[__CONTEXT]], %struct.anon.80** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.80*, %struct.anon.80** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7795,28 +8557,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_81:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_81]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.81* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_81]], %struct.anon.81* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.81* +// CHECK-64-NEXT: store [[STRUCT_ANON_81]] [[TMP5]], %struct.anon.81* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.81*)* @__omp_outlined__82 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7824,10 +8593,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__82 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.81* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.81*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7837,38 +8607,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.81* [[__CONTEXT]], %struct.anon.81** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.81*, %struct.anon.81** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7876,28 +8648,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_82:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_82]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.82* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_82]], %struct.anon.82* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.82* +// CHECK-64-NEXT: store [[STRUCT_ANON_82]] [[TMP5]], %struct.anon.82* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.82*)* @__omp_outlined__83 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7905,10 +8684,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__83 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.82* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.82*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7918,38 +8698,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.82* [[__CONTEXT]], %struct.anon.82** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.82*, %struct.anon.82** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -7957,28 +8739,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_83:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_83]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.83* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_83]], %struct.anon.83* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.83* +// CHECK-64-NEXT: store [[STRUCT_ANON_83]] [[TMP5]], %struct.anon.83* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.83*)* @__omp_outlined__84 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -7986,10 +8775,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__84 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.83* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.83*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7999,38 +8789,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.83* [[__CONTEXT]], %struct.anon.83** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.83*, %struct.anon.83** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8038,28 +8830,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_84:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_84]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.84* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_84]], %struct.anon.84* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.84* +// CHECK-64-NEXT: store [[STRUCT_ANON_84]] [[TMP5]], %struct.anon.84* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.84*)* @__omp_outlined__85 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8067,10 +8866,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__85 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.84* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.84*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8080,68 +8880,77 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.84* [[__CONTEXT]], %struct.anon.84** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.84*, %struct.anon.84** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP291]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_85:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_85]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.85* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_85]], %struct.anon.85* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.85* +// CHECK-64-NEXT: store [[STRUCT_ANON_85]] [[TMP5]], %struct.anon.85* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.85*)* @__omp_outlined__86 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8149,10 +8958,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__86 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.85* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.85*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8162,71 +8972,80 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.85* [[__CONTEXT]], %struct.anon.85** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.85*, %struct.anon.85** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_86:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_86]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.86* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_86]], %struct.anon.86* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.86* +// CHECK-64-NEXT: store [[STRUCT_ANON_86]] [[TMP5]], %struct.anon.86* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.86*)* @__omp_outlined__87 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8234,10 +9053,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__87 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.86* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.86*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8247,88 +9067,97 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.86* [[__CONTEXT]], %struct.anon.86** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.86*, %struct.anon.86** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-64-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_87:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_87]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.87* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_87]], %struct.anon.87* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.87* +// CHECK-64-NEXT: store [[STRUCT_ANON_87]] [[TMP5]], %struct.anon.87* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.87*)* @__omp_outlined__88 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8336,10 +9165,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__88 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.87* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.87*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8349,38 +9179,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.87* [[__CONTEXT]], %struct.anon.87** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.87*, %struct.anon.87** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8388,28 +9220,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_88:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_88]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.88* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_88]], %struct.anon.88* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.88* +// CHECK-64-NEXT: store [[STRUCT_ANON_88]] [[TMP5]], %struct.anon.88* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.88*)* @__omp_outlined__89 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8417,10 +9256,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__89 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.88* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.88*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8430,38 +9270,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.88* [[__CONTEXT]], %struct.anon.88** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.88*, %struct.anon.88** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8469,28 +9311,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_89:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_89]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.89* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_89]], %struct.anon.89* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.89* +// CHECK-64-NEXT: store [[STRUCT_ANON_89]] [[TMP5]], %struct.anon.89* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.89*)* @__omp_outlined__90 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8498,10 +9347,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__90 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.89* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.89*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8511,38 +9361,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.89* [[__CONTEXT]], %struct.anon.89** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.89*, %struct.anon.89** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8550,28 +9402,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_90:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_90]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.90* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_90]], %struct.anon.90* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.90* +// CHECK-64-NEXT: store [[STRUCT_ANON_90]] [[TMP5]], %struct.anon.90* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.90*)* @__omp_outlined__91 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8579,10 +9438,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__91 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.90* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.90*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8592,38 +9452,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.90* [[__CONTEXT]], %struct.anon.90** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.90*, %struct.anon.90** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8631,28 +9493,35 @@ // CHECK-64: omp.dispatch.inc: // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_91:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_91]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.91* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_91]], %struct.anon.91* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.91* +// CHECK-64-NEXT: store [[STRUCT_ANON_91]] [[TMP5]], %struct.anon.91* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.91*)* @__omp_outlined__92 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8660,10 +9529,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__92 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.91* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.91*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8673,80 +9543,89 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.91* [[__CONTEXT]], %struct.anon.91** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.91*, %struct.anon.91** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 -// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_92:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_92]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.92* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_92]], %struct.anon.92* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.92* +// CHECK-64-NEXT: store [[STRUCT_ANON_92]] [[TMP5]], %struct.anon.92* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.92*)* @__omp_outlined__93 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8754,10 +9633,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__93 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.92* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.92*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8767,63 +9647,72 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.92* [[__CONTEXT]], %struct.anon.92** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.92*, %struct.anon.92** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-64: omp.loop.exit: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_93:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_93]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.93* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_93]], %struct.anon.93* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.93* +// CHECK-64-NEXT: store [[STRUCT_ANON_93]] [[TMP5]], %struct.anon.93* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.93*)* @__omp_outlined__94 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8831,10 +9720,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__94 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.93* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.93*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8844,80 +9734,89 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.93* [[__CONTEXT]], %struct.anon.93** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.93*, %struct.anon.93** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-64-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-64-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-64-NEXT: ret void // // // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_94:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_94]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.94* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_94]], %struct.anon.94* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.94* +// CHECK-64-NEXT: store [[STRUCT_ANON_94]] [[TMP5]], %struct.anon.94* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.94*)* @__omp_outlined__95 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8925,10 +9824,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__95 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.94* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.94*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8938,38 +9838,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.94* [[__CONTEXT]], %struct.anon.94** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.94*, %struct.anon.94** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -8983,14 +9885,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_95:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_95]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.95* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_95]], %struct.anon.95* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.95* +// CHECK-64-NEXT: store [[STRUCT_ANON_95]] [[TMP5]], %struct.anon.95* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.95*)* @__omp_outlined__96 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -8998,10 +9907,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__96 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.95* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.95*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9011,38 +9921,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.95* [[__CONTEXT]], %struct.anon.95** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.95*, %struct.anon.95** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -9056,14 +9968,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_96:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_96]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.96* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_96]], %struct.anon.96* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.96* +// CHECK-64-NEXT: store [[STRUCT_ANON_96]] [[TMP5]], %struct.anon.96* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.96*)* @__omp_outlined__97 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -9071,10 +9990,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__97 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.96* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.96*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9084,38 +10004,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.96* [[__CONTEXT]], %struct.anon.96** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.96*, %struct.anon.96** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -9129,14 +10051,21 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-64-SAME: () #[[ATTR8]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_97:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_97]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.97* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_97]], %struct.anon.97* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.97* +// CHECK-64-NEXT: store [[STRUCT_ANON_97]] [[TMP5]], %struct.anon.97* [[TMP6]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.97*)* @__omp_outlined__98 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -9144,10 +10073,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__98 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.97* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.97*, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9157,38 +10087,40 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store %struct.anon.97* [[__CONTEXT]], %struct.anon.97** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.97*, %struct.anon.97** [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-64-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-64: omp.inner.for.end: @@ -9203,7 +10135,7 @@ // CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -9213,15 +10145,14 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[TMP2]], align 1 // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -9229,175 +10160,191 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_0]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG19:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP9]], 10 +// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK-32-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK-32-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK-32-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK-32-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.0* +// CHECK-32-NEXT: store [[STRUCT_ANON_0]] [[TMP21]], %struct.anon.0* [[TMP22]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK-32-NEXT: [[TMP24:%.*]] = zext i1 [[TOBOOL5]] to i32 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP24]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP20]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i32 12), !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 -// CHECK-32-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-32: cond.true8: -// CHECK-32-NEXT: br label [[COND_END10:%.*]] -// CHECK-32: cond.false9: -// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: br label [[COND_END10]] -// CHECK-32: cond.end10: -// CHECK-32-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] -// CHECK-32-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-32-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP31]], 9 +// CHECK-32-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-32: cond.true9: +// CHECK-32-NEXT: br label [[COND_END11:%.*]] +// CHECK-32: cond.false10: +// CHECK-32-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: br label [[COND_END11]] +// CHECK-32: cond.end11: +// CHECK-32-NEXT: [[COND12:%.*]] = phi i32 [ 9, [[COND_TRUE9]] ], [ [[TMP32]], [[COND_FALSE10]] ] +// CHECK-32-NEXT: store i32 [[COND12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-32: omp.inner.for.cond12: -// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 -// CHECK-32-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-32: omp.inner.for.body14: -// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK-32-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* -// CHECK-32-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-32-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 -// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* -// CHECK-32-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* -// CHECK-32-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* -// CHECK-32-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK-32-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 -// CHECK-32-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-32-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] +// CHECK-32: omp.inner.for.cond13: +// CHECK-32-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP14:%.*]] = icmp slt i32 [[TMP34]], 10 +// CHECK-32-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-32: omp.inner.for.body15: +// CHECK-32-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP35]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP36]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 4 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP40]], i32* [[TMP39]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP42:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP42]] to i1 +// CHECK-32-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL17]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL18]], i8* [[TMP41]], align 4 +// CHECK-32-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG19]] to i8* +// CHECK-32-NEXT: [[TMP44:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK-32-NEXT: [[TMP45:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP43]], i8* [[TMP44]]) +// CHECK-32-NEXT: [[TMP46:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], align 1 +// CHECK-32-NEXT: [[TMP47:%.*]] = bitcast i8* [[TMP45]] to %struct.anon.0* +// CHECK-32-NEXT: store [[STRUCT_ANON_0]] [[TMP46]], %struct.anon.0* [[TMP47]], align 1 +// CHECK-32-NEXT: [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP48]] to i1 +// CHECK-32-NEXT: [[TMP49:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP49]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__2 to i8*), i8* null, i8* [[TMP45]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP44]], i32 12) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] // CHECK-32: omp.inner.for.inc21: -// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] // CHECK-32-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK-32-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP52]], [[TMP53]] // CHECK-32-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-32-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP54]], [[TMP55]] // CHECK-32-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 +// CHECK-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP56]], 9 // CHECK-32-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] // CHECK-32: cond.true26: // CHECK-32-NEXT: br label [[COND_END28:%.*]] // CHECK-32: cond.false27: -// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END28]] // CHECK-32: cond.end28: -// CHECK-32-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] +// CHECK-32-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ] // CHECK-32-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-32-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP58]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP134:![0-9]+]] // CHECK-32: omp.inner.for.end30: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 -// CHECK-32-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0 +// CHECK-32-NEXT: br i1 [[TMP60]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9406,13 +10353,14 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9422,86 +10370,95 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] -// CHECK-32: omp.inner.for.cond2: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] -// CHECK-32: omp.inner.for.body4: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 -// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] -// CHECK-32: omp.body.continue7: -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] -// CHECK-32: omp.inner.for.inc8: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] -// CHECK-32: omp.inner.for.end10: +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] +// CHECK-32: omp.inner.for.cond3: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP21]], [[TMP22]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END11:%.*]] +// CHECK-32: omp.inner.for.body5: +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], 1 +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-NEXT: store i32 [[ADD7]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE8:%.*]] +// CHECK-32: omp.body.continue8: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC9:%.*]] +// CHECK-32: omp.inner.for.inc9: +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP139:![0-9]+]] +// CHECK-32: omp.inner.for.end11: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9510,13 +10467,14 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9526,86 +10484,95 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32: omp_if.then: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32: omp_if.else: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] -// CHECK-32: omp.inner.for.cond2: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] -// CHECK-32: omp.inner.for.body4: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 -// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] -// CHECK-32: omp.body.continue7: -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] -// CHECK-32: omp.inner.for.inc8: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] -// CHECK-32: omp.inner.for.end10: +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] +// CHECK-32: omp.inner.for.cond3: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP21]], [[TMP22]] +// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END11:%.*]] +// CHECK-32: omp.inner.for.body5: +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], 1 +// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-NEXT: store i32 [[ADD7]], i32* [[I]], align 4 +// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE8:%.*]] +// CHECK-32: omp.body.continue8: +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC9:%.*]] +// CHECK-32: omp.inner.for.inc9: +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP143:![0-9]+]] +// CHECK-32: omp.inner.for.end11: // CHECK-32-NEXT: br label [[OMP_IF_END]] // CHECK-32: omp_if.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9616,6 +10583,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -9625,7 +10593,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -9633,92 +10601,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.2* +// CHECK-32-NEXT: store [[STRUCT_ANON_2]] [[TMP16]], %struct.anon.2* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__4 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9727,12 +10709,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9742,58 +10725,64 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9804,6 +10793,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -9813,7 +10803,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -9821,92 +10811,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.4* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.4* +// CHECK-32-NEXT: store [[STRUCT_ANON_4]] [[TMP16]], %struct.anon.4* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.4*)* @__omp_outlined__6 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9915,12 +10919,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9930,48 +10935,54 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9982,6 +10993,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -9991,7 +11003,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -9999,92 +11011,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.6* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.6* +// CHECK-32-NEXT: store [[STRUCT_ANON_6]] [[TMP16]], %struct.anon.6* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.6*)* @__omp_outlined__8 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10093,12 +11119,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10108,46 +11135,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -10155,9 +11188,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10168,6 +11201,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10177,7 +11211,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -10185,92 +11219,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.8* +// CHECK-32-NEXT: store [[STRUCT_ANON_8]] [[TMP16]], %struct.anon.8* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10279,12 +11327,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10294,46 +11343,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -10341,9 +11396,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10354,6 +11409,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10363,7 +11419,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -10371,92 +11427,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.10* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.10* +// CHECK-32-NEXT: store [[STRUCT_ANON_10]] [[TMP16]], %struct.anon.10* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.10*)* @__omp_outlined__12 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10465,12 +11535,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__12 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10480,46 +11551,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -10527,9 +11604,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10540,6 +11617,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10549,7 +11627,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -10557,92 +11635,106 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__13 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_12]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.12* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.12* +// CHECK-32-NEXT: store [[STRUCT_ANON_12]] [[TMP16]], %struct.anon.12* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.12*)* @__omp_outlined__14 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10651,12 +11743,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__14 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10666,46 +11759,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -10713,9 +11812,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10727,7 +11826,7 @@ // CHECK-32-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -10736,12 +11835,12 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -10749,23 +11848,30 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__15 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_14]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK-32-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK-32-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10773,82 +11879,87 @@ // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* -// CHECK-32-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-NEXT: store i32 [[TMP16]], i32* [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast %struct.anon.14* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP18:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK-32-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP17]], i8* [[TMP18]]) +// CHECK-32-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP19]] to %struct.anon.14* +// CHECK-32-NEXT: store [[STRUCT_ANON_14]] [[TMP20]], %struct.anon.14* [[TMP21]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.14*)* @__omp_outlined__16 to i8*), i8* null, i8* [[TMP19]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP18]], i32 12) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP28]], 9 // CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK-32: cond.true6: // CHECK-32-NEXT: br label [[COND_END8:%.*]] // CHECK-32: cond.false7: -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END8]] // CHECK-32: cond.end8: -// CHECK-32-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] +// CHECK-32-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP29]], [[COND_FALSE7]] ] // CHECK-32-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK-32-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32: .omp.lastprivate.then: -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-NEXT: store i32 [[TMP33]], i32* [[A]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32: .omp.lastprivate.done: // CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) @@ -10856,13 +11967,14 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__16 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10873,54 +11985,62 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-32-NEXT: store i32 [[TMP15]], i32* [[A1]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32: .omp.lastprivate.then: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP20]], i32* [[A]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32: .omp.lastprivate.done: // CHECK-32-NEXT: ret void @@ -10929,6 +12049,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10938,7 +12059,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -10946,99 +12067,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__17 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_16]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.16* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.16* +// CHECK-32-NEXT: store [[STRUCT_ANON_16]] [[TMP16]], %struct.anon.16* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.16*)* @__omp_outlined__18 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__18 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11048,61 +12184,68 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11112,7 +12255,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11120,99 +12263,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__19 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_18]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.18* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.18* +// CHECK-32-NEXT: store [[STRUCT_ANON_18]] [[TMP16]], %struct.anon.18* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.18*)* @__omp_outlined__20 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__20 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11222,51 +12380,58 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11276,7 +12441,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11284,99 +12449,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__21 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_20]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.20* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.20* +// CHECK-32-NEXT: store [[STRUCT_ANON_20]] [[TMP16]], %struct.anon.20* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.20*)* @__omp_outlined__22 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__22 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11386,46 +12566,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -11439,6 +12625,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11448,7 +12635,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11456,99 +12643,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__23 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_22]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.22* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.22* +// CHECK-32-NEXT: store [[STRUCT_ANON_22]] [[TMP16]], %struct.anon.22* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.22*)* @__omp_outlined__24 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__24 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11558,46 +12760,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -11611,6 +12819,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11620,7 +12829,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11628,99 +12837,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__25 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_24]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.24* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.24* +// CHECK-32-NEXT: store [[STRUCT_ANON_24]] [[TMP16]], %struct.anon.24* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.24*)* @__omp_outlined__26 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__26 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11730,46 +12954,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -11783,6 +13013,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11792,7 +13023,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11800,99 +13031,114 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__27 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_26]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.26* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.26* +// CHECK-32-NEXT: store [[STRUCT_ANON_26]] [[TMP16]], %struct.anon.26* [[TMP17]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.26*)* @__omp_outlined__28 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__28 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11902,46 +13148,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -11955,6 +13207,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11964,7 +13217,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -11972,10 +13225,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__29 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.27* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.27*, align 4 // CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11983,82 +13237,93 @@ // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_28]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.27* [[__CONTEXT]], %struct.anon.27** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.27*, %struct.anon.27** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.28* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.28* +// CHECK-32-NEXT: store [[STRUCT_ANON_28]] [[TMP14]], %struct.anon.28* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.28*)* @__omp_outlined__30 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12067,12 +13332,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__30 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.28* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.28*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12082,48 +13346,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.28* [[__CONTEXT]], %struct.anon.28** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], %struct.anon.28* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12134,6 +13402,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12143,7 +13412,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.29* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -12151,10 +13420,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__31 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.29* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.29*, align 4 // CHECK-32-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12162,84 +13432,95 @@ // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_30:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_30]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) +// CHECK-32-NEXT: store %struct.anon.29* [[__CONTEXT]], %struct.anon.29** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.29*, %struct.anon.29** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.30* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP13:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.30* +// CHECK-32-NEXT: store [[STRUCT_ANON_30]] [[TMP15]], %struct.anon.30* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.30*)* @__omp_outlined__32 to i8*), i8* null, i8* [[TMP14]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i32 8), !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP23]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP24]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK-32-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12248,12 +13529,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__32 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.30* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.30*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12263,58 +13543,62 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.30* [[__CONTEXT]], %struct.anon.30** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.30*, %struct.anon.30** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_30:%.*]], %struct.anon.30* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12325,6 +13609,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -12334,7 +13619,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.31* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -12342,10 +13627,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__33 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.31* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.31*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -12353,9 +13639,16 @@ // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_32]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.31* [[__CONTEXT]], %struct.anon.31** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]] to i32* +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]] to i32* // CHECK-32-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK-32-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** // CHECK-32-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) @@ -12364,53 +13657,59 @@ // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], i32** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.32* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP13:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.32* +// CHECK-32-NEXT: store [[STRUCT_ANON_32]] [[TMP15]], %struct.anon.32* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.32*)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8* [[TMP14]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i32 8), !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK-32-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12418,16 +13717,17 @@ // CHECK-32-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 // CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) // CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32 4) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32 4) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__34 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.32* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.32*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12437,48 +13737,52 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.32* [[__CONTEXT]], %struct.anon.32** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.32*, %struct.anon.32** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_32:%.*]], %struct.anon.32* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12492,25 +13796,22 @@ // CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK-32-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.32** +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.32** [[TMP3]] to %struct.anon.32* +// CHECK-32-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.32* [[TMP4]]) #[[ATTR2]] // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_33:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12520,7 +13821,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.33* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -12528,92 +13829,104 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__35 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.33* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.33*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_34]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.33* [[__CONTEXT]], %struct.anon.33** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.33*, %struct.anon.33** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.34* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.34* +// CHECK-32-NEXT: store [[STRUCT_ANON_34]] [[TMP14]], %struct.anon.34* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.34*)* @__omp_outlined__36 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12622,12 +13935,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__36 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.34* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.34*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12637,46 +13949,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.34* [[__CONTEXT]], %struct.anon.34** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_34:%.*]], %struct.anon.34* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -12684,9 +14000,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12697,6 +14013,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12706,7 +14023,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.35* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -12714,92 +14031,104 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__37 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.35* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.35*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_36:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_36]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.35* [[__CONTEXT]], %struct.anon.35** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.35*, %struct.anon.35** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.36* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.36* +// CHECK-32-NEXT: store [[STRUCT_ANON_36]] [[TMP14]], %struct.anon.36* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.36*)* @__omp_outlined__38 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12808,12 +14137,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__38 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.36* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.36*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12823,46 +14151,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.36* [[__CONTEXT]], %struct.anon.36** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.36*, %struct.anon.36** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_36:%.*]], %struct.anon.36* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -12870,9 +14202,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12883,6 +14215,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12892,7 +14225,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.37* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -12900,92 +14233,104 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__39 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.37* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.37*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_38]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.37* [[__CONTEXT]], %struct.anon.37** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.38* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.38* +// CHECK-32-NEXT: store [[STRUCT_ANON_38]] [[TMP14]], %struct.anon.38* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.38*)* @__omp_outlined__40 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12994,12 +14339,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__40 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.38* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.38*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13009,46 +14353,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.38* [[__CONTEXT]], %struct.anon.38** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.38*, %struct.anon.38** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_38:%.*]], %struct.anon.38* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -13056,9 +14404,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13069,6 +14417,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_39:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13078,7 +14427,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.39* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13086,92 +14435,104 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__41 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.39* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.39*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_40]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.39* [[__CONTEXT]], %struct.anon.39** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.39*, %struct.anon.39** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.40* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.40* +// CHECK-32-NEXT: store [[STRUCT_ANON_40]] [[TMP14]], %struct.anon.40* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.40*)* @__omp_outlined__42 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13180,12 +14541,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__42 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.40* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.40*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13195,46 +14555,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.40* [[__CONTEXT]], %struct.anon.40** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.40*, %struct.anon.40** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_40:%.*]], %struct.anon.40* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -13242,9 +14606,9 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13255,6 +14619,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13264,7 +14629,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.41* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13272,99 +14637,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__43 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.41* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.41*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_42:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_42]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.41* [[__CONTEXT]], %struct.anon.41** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.41*, %struct.anon.41** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.42* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.42* +// CHECK-32-NEXT: store [[STRUCT_ANON_42]] [[TMP14]], %struct.anon.42* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.42*)* @__omp_outlined__44 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__44 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.42* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.42*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13374,51 +14750,56 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.42* [[__CONTEXT]], %struct.anon.42** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.42*, %struct.anon.42** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_42:%.*]], %struct.anon.42* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_43:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13428,7 +14809,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.43* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13436,99 +14817,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__45 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.43* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.43*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_44:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_44]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.43* [[__CONTEXT]], %struct.anon.43** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.43*, %struct.anon.43** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.44* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.44* +// CHECK-32-NEXT: store [[STRUCT_ANON_44]] [[TMP14]], %struct.anon.44* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.44*)* @__omp_outlined__46 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__46 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.44* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.44*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13538,61 +14930,66 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.44* [[__CONTEXT]], %struct.anon.44** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.44*, %struct.anon.44** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_44:%.*]], %struct.anon.44* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_45:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13602,7 +14999,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.45* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13610,99 +15007,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__47 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.45* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.45*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_46:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_46]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.45* [[__CONTEXT]], %struct.anon.45** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.45*, %struct.anon.45** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.46* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.46* +// CHECK-32-NEXT: store [[STRUCT_ANON_46]] [[TMP14]], %struct.anon.46* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.46*)* @__omp_outlined__48 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__48 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.46* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.46*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13712,51 +15120,56 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.46* [[__CONTEXT]], %struct.anon.46** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.46*, %struct.anon.46** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_46:%.*]], %struct.anon.46* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_47:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13766,7 +15179,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.47* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13774,99 +15187,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__49 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.47* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.47*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_48:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_48]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.47* [[__CONTEXT]], %struct.anon.47** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.47*, %struct.anon.47** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.48* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.48* +// CHECK-32-NEXT: store [[STRUCT_ANON_48]] [[TMP14]], %struct.anon.48* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.48*)* @__omp_outlined__50 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__50 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.48* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.48*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13876,46 +15300,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.48* [[__CONTEXT]], %struct.anon.48** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.48*, %struct.anon.48** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_48:%.*]], %struct.anon.48* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -13929,6 +15357,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_49:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -13938,7 +15367,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.49* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -13946,99 +15375,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__51 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.49* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.49*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_50:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_50]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.49* [[__CONTEXT]], %struct.anon.49** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.49*, %struct.anon.49** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.50* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.50* +// CHECK-32-NEXT: store [[STRUCT_ANON_50]] [[TMP14]], %struct.anon.50* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.50*)* @__omp_outlined__52 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__52 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.50* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.50*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14048,46 +15488,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.50* [[__CONTEXT]], %struct.anon.50** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.50*, %struct.anon.50** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_50:%.*]], %struct.anon.50* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -14101,6 +15545,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_51:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14110,7 +15555,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.51* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14118,99 +15563,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__53 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.51* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.51*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_52:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_52]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.51* [[__CONTEXT]], %struct.anon.51** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.51*, %struct.anon.51** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.52* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.52* +// CHECK-32-NEXT: store [[STRUCT_ANON_52]] [[TMP14]], %struct.anon.52* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.52*)* @__omp_outlined__54 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__54 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.52* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.52*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14220,46 +15676,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.52* [[__CONTEXT]], %struct.anon.52** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.52*, %struct.anon.52** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_52:%.*]], %struct.anon.52* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -14273,6 +15733,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_53:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14282,7 +15743,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.53* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14290,99 +15751,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__55 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.53* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.53*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_54:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_54]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.53* [[__CONTEXT]], %struct.anon.53** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.53*, %struct.anon.53** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.54* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.54* +// CHECK-32-NEXT: store [[STRUCT_ANON_54]] [[TMP14]], %struct.anon.54* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.54*)* @__omp_outlined__56 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__56 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.54* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.54*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14392,46 +15864,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.54* [[__CONTEXT]], %struct.anon.54** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.54*, %struct.anon.54** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_54:%.*]], %struct.anon.54* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -14445,6 +15921,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_55:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14454,7 +15931,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.55* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14462,99 +15939,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__57 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.55* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.55*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_56:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_56]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.55* [[__CONTEXT]], %struct.anon.55** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.55*, %struct.anon.55** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.56* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.56* +// CHECK-32-NEXT: store [[STRUCT_ANON_56]] [[TMP14]], %struct.anon.56* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.56*)* @__omp_outlined__58 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__58 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.56* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.56*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14564,51 +16052,56 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.56* [[__CONTEXT]], %struct.anon.56** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.56*, %struct.anon.56** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_56:%.*]], %struct.anon.56* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_57:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14618,7 +16111,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.57* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14626,99 +16119,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__59 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.57* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.57*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_58:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_58]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.57* [[__CONTEXT]], %struct.anon.57** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.57*, %struct.anon.57** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.58* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.58* +// CHECK-32-NEXT: store [[STRUCT_ANON_58]] [[TMP14]], %struct.anon.58* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.58*)* @__omp_outlined__60 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__60 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.58* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.58*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14728,61 +16232,66 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.58* [[__CONTEXT]], %struct.anon.58** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.58*, %struct.anon.58** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_58:%.*]], %struct.anon.58* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_59:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14792,7 +16301,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.59* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14800,99 +16309,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__61 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.59* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.59*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_60:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_60]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.59* [[__CONTEXT]], %struct.anon.59** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.59*, %struct.anon.59** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.60* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.60* +// CHECK-32-NEXT: store [[STRUCT_ANON_60]] [[TMP14]], %struct.anon.60* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.60*)* @__omp_outlined__62 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__62 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.60* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.60*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14902,51 +16422,56 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.60* [[__CONTEXT]], %struct.anon.60** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.60*, %struct.anon.60** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_60:%.*]], %struct.anon.60* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_61:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -14956,7 +16481,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.61* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -14964,99 +16489,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__63 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.61* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.61*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_62:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_62]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.61* [[__CONTEXT]], %struct.anon.61** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.61*, %struct.anon.61** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.62* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.62* +// CHECK-32-NEXT: store [[STRUCT_ANON_62]] [[TMP14]], %struct.anon.62* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.62*)* @__omp_outlined__64 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__64 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.62* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.62*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15066,46 +16602,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.62* [[__CONTEXT]], %struct.anon.62** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.62*, %struct.anon.62** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_62:%.*]], %struct.anon.62* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -15119,6 +16659,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_63:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -15128,7 +16669,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.63* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15136,99 +16677,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__65 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.63* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.63*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_64:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_64]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.63* [[__CONTEXT]], %struct.anon.63** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.63*, %struct.anon.63** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.64* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.64* +// CHECK-32-NEXT: store [[STRUCT_ANON_64]] [[TMP14]], %struct.anon.64* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.64*)* @__omp_outlined__66 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__66 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.64* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.64*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15238,46 +16790,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.64* [[__CONTEXT]], %struct.anon.64** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.64*, %struct.anon.64** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_64:%.*]], %struct.anon.64* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -15291,6 +16847,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_65:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -15300,7 +16857,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.65* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15308,99 +16865,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__67 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.65* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.65*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_66:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_66]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.65* [[__CONTEXT]], %struct.anon.65** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.65*, %struct.anon.65** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.66* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.66* +// CHECK-32-NEXT: store [[STRUCT_ANON_66]] [[TMP14]], %struct.anon.66* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.66*)* @__omp_outlined__68 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__68 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.66* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.66*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15410,46 +16978,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.66* [[__CONTEXT]], %struct.anon.66** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.66*, %struct.anon.66** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_66:%.*]], %struct.anon.66* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -15463,6 +17035,7 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 // CHECK-32-SAME: () #[[ATTR0]] { // CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_67:%.*]], align 1 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -15472,7 +17045,7 @@ // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.67* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15480,99 +17053,110 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__69 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.67* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.67*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_68:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_68]], align 8 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.67* [[__CONTEXT]], %struct.anon.67** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.67*, %struct.anon.67** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 -// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.68* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.68* +// CHECK-32-NEXT: store [[STRUCT_ANON_68]] [[TMP14]], %struct.anon.68* [[TMP15]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.68*)* @__omp_outlined__70 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32: cond.true5: // CHECK-32-NEXT: br label [[COND_END7:%.*]] // CHECK-32: cond.false6: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END7]] // CHECK-32: cond.end7: -// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__70 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.68* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.68*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15582,46 +17166,50 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.68* [[__CONTEXT]], %struct.anon.68** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.68*, %struct.anon.68** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_68:%.*]], %struct.anon.68* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -15636,7 +17224,8 @@ // CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_69:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_69]], align 8 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -15644,11 +17233,17 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.69* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_69]], %struct.anon.69* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.69* +// CHECK-32-NEXT: store [[STRUCT_ANON_69]] [[TMP5]], %struct.anon.69* [[TMP6]], align 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-32-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.69*)* @__omp_outlined__71 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15656,10 +17251,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__71 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.69* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.69*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15669,80 +17265,89 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.69* [[__CONTEXT]], %struct.anon.69** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.69*, %struct.anon.69** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_70:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_70]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.70* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_70]], %struct.anon.70* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.70* +// CHECK-32-NEXT: store [[STRUCT_ANON_70]] [[TMP5]], %struct.anon.70* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.70*)* @__omp_outlined__72 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15750,10 +17355,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__72 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.70* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.70*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15763,63 +17369,72 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.70* [[__CONTEXT]], %struct.anon.70** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.70*, %struct.anon.70** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_71:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_71]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.71* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_71]], %struct.anon.71* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.71* +// CHECK-32-NEXT: store [[STRUCT_ANON_71]] [[TMP5]], %struct.anon.71* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.71*)* @__omp_outlined__73 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15827,10 +17442,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__73 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.71* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.71*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15840,80 +17456,89 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.71* [[__CONTEXT]], %struct.anon.71** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.71*, %struct.anon.71** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_72:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_72]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.72* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_72]], %struct.anon.72* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.72* +// CHECK-32-NEXT: store [[STRUCT_ANON_72]] [[TMP5]], %struct.anon.72* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.72*)* @__omp_outlined__74 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15921,10 +17546,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__74 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.72* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.72*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15934,38 +17560,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.72* [[__CONTEXT]], %struct.anon.72** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.72*, %struct.anon.72** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -15979,14 +17607,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_73:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_73]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.73* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_73]], %struct.anon.73* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.73* +// CHECK-32-NEXT: store [[STRUCT_ANON_73]] [[TMP5]], %struct.anon.73* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.73*)* @__omp_outlined__75 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -15994,10 +17629,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__75 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.73* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.73*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16007,38 +17643,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.73* [[__CONTEXT]], %struct.anon.73** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.73*, %struct.anon.73** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16052,14 +17690,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_74:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_74]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.74* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_74]], %struct.anon.74* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.74* +// CHECK-32-NEXT: store [[STRUCT_ANON_74]] [[TMP5]], %struct.anon.74* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.74*)* @__omp_outlined__76 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16067,10 +17712,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__76 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.74* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.74*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16080,38 +17726,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.74* [[__CONTEXT]], %struct.anon.74** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.74*, %struct.anon.74** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16125,14 +17773,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_75:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_75]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.75* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_75]], %struct.anon.75* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.75* +// CHECK-32-NEXT: store [[STRUCT_ANON_75]] [[TMP5]], %struct.anon.75* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.75*)* @__omp_outlined__77 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16140,10 +17795,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__77 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.75* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.75*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16153,38 +17809,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.75* [[__CONTEXT]], %struct.anon.75** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.75*, %struct.anon.75** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16199,7 +17857,8 @@ // CHECK-32-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_76:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_76]], align 8 // CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -16207,11 +17866,17 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.76* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_76]], %struct.anon.76* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.76* +// CHECK-32-NEXT: store [[STRUCT_ANON_76]] [[TMP5]], %struct.anon.76* [[TMP6]], align 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-32-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.76*)* @__omp_outlined__78 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16219,10 +17884,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__78 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.76* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.76*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16232,88 +17898,97 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.76* [[__CONTEXT]], %struct.anon.76** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.76*, %struct.anon.76** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_77:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_77]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.77* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_77]], %struct.anon.77* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.77* +// CHECK-32-NEXT: store [[STRUCT_ANON_77]] [[TMP5]], %struct.anon.77* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.77*)* @__omp_outlined__79 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16321,10 +17996,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__79 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.77* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.77*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16334,71 +18010,80 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.77* [[__CONTEXT]], %struct.anon.77** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.77*, %struct.anon.77** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_78:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_78]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.78* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_78]], %struct.anon.78* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.78* +// CHECK-32-NEXT: store [[STRUCT_ANON_78]] [[TMP5]], %struct.anon.78* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.78*)* @__omp_outlined__80 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16406,10 +18091,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__80 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.78* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.78*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16419,88 +18105,97 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.78* [[__CONTEXT]], %struct.anon.78** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.78*, %struct.anon.78** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_79:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_79]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.79* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_79]], %struct.anon.79* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.79* +// CHECK-32-NEXT: store [[STRUCT_ANON_79]] [[TMP5]], %struct.anon.79* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.79*)* @__omp_outlined__81 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16508,10 +18203,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__81 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.79* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.79*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16521,38 +18217,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.79* [[__CONTEXT]], %struct.anon.79** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.79*, %struct.anon.79** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16560,28 +18258,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_80:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_80]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.80* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_80]], %struct.anon.80* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.80* +// CHECK-32-NEXT: store [[STRUCT_ANON_80]] [[TMP5]], %struct.anon.80* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.80*)* @__omp_outlined__82 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16589,10 +18294,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__82 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.80* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.80*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16602,38 +18308,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.80* [[__CONTEXT]], %struct.anon.80** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.80*, %struct.anon.80** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16641,28 +18349,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_81:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_81]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.81* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_81]], %struct.anon.81* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.81* +// CHECK-32-NEXT: store [[STRUCT_ANON_81]] [[TMP5]], %struct.anon.81* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.81*)* @__omp_outlined__83 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16670,10 +18385,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__83 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.81* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.81*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16683,38 +18399,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.81* [[__CONTEXT]], %struct.anon.81** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.81*, %struct.anon.81** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16722,28 +18440,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_82:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_82]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.82* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_82]], %struct.anon.82* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.82* +// CHECK-32-NEXT: store [[STRUCT_ANON_82]] [[TMP5]], %struct.anon.82* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.82*)* @__omp_outlined__84 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16751,10 +18476,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__84 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.82* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.82*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16764,38 +18490,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.82* [[__CONTEXT]], %struct.anon.82** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.82*, %struct.anon.82** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -16803,28 +18531,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_83:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_83]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.83* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_83]], %struct.anon.83* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.83* +// CHECK-32-NEXT: store [[STRUCT_ANON_83]] [[TMP5]], %struct.anon.83* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.83*)* @__omp_outlined__85 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16832,10 +18567,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__85 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.83* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.83*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16845,68 +18581,77 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.83* [[__CONTEXT]], %struct.anon.83** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.83*, %struct.anon.83** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP291]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_84:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_84]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.84* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_84]], %struct.anon.84* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.84* +// CHECK-32-NEXT: store [[STRUCT_ANON_84]] [[TMP5]], %struct.anon.84* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.84*)* @__omp_outlined__86 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16914,10 +18659,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__86 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.84* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.84*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16927,71 +18673,80 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.84* [[__CONTEXT]], %struct.anon.84** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.84*, %struct.anon.84** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_85:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_85]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.85* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_85]], %struct.anon.85* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.85* +// CHECK-32-NEXT: store [[STRUCT_ANON_85]] [[TMP5]], %struct.anon.85* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.85*)* @__omp_outlined__87 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -16999,10 +18754,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__87 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.85* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.85*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17012,88 +18768,97 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.85* [[__CONTEXT]], %struct.anon.85** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.85*, %struct.anon.85** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_86:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_86]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.86* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_86]], %struct.anon.86* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.86* +// CHECK-32-NEXT: store [[STRUCT_ANON_86]] [[TMP5]], %struct.anon.86* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.86*)* @__omp_outlined__88 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17101,10 +18866,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__88 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.86* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.86*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17114,38 +18880,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.86* [[__CONTEXT]], %struct.anon.86** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.86*, %struct.anon.86** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17153,28 +18921,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_87:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_87]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.87* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_87]], %struct.anon.87* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.87* +// CHECK-32-NEXT: store [[STRUCT_ANON_87]] [[TMP5]], %struct.anon.87* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.87*)* @__omp_outlined__89 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17182,10 +18957,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__89 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.87* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.87*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17195,38 +18971,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.87* [[__CONTEXT]], %struct.anon.87** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.87*, %struct.anon.87** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17234,28 +19012,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_88:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_88]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.88* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_88]], %struct.anon.88* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.88* +// CHECK-32-NEXT: store [[STRUCT_ANON_88]] [[TMP5]], %struct.anon.88* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.88*)* @__omp_outlined__90 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17263,10 +19048,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__90 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.88* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.88*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17276,38 +19062,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.88* [[__CONTEXT]], %struct.anon.88** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.88*, %struct.anon.88** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17315,28 +19103,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_89:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_89]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.89* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_89]], %struct.anon.89* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.89* +// CHECK-32-NEXT: store [[STRUCT_ANON_89]] [[TMP5]], %struct.anon.89* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.89*)* @__omp_outlined__91 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17344,10 +19139,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__91 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.89* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.89*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17357,38 +19153,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.89* [[__CONTEXT]], %struct.anon.89** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.89*, %struct.anon.89** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17396,28 +19194,35 @@ // CHECK-32: omp.dispatch.inc: // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_90:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_90]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.90* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_90]], %struct.anon.90* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.90* +// CHECK-32-NEXT: store [[STRUCT_ANON_90]] [[TMP5]], %struct.anon.90* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.90*)* @__omp_outlined__92 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17425,10 +19230,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__92 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.90* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.90*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17438,80 +19244,89 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.90* [[__CONTEXT]], %struct.anon.90** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.90*, %struct.anon.90** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_91:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_91]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.91* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_91]], %struct.anon.91* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.91* +// CHECK-32-NEXT: store [[STRUCT_ANON_91]] [[TMP5]], %struct.anon.91* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.91*)* @__omp_outlined__93 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17519,10 +19334,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__93 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.91* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.91*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17532,63 +19348,72 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.91* [[__CONTEXT]], %struct.anon.91** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.91*, %struct.anon.91** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32: omp.loop.exit: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_92:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_92]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.92* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_92]], %struct.anon.92* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.92* +// CHECK-32-NEXT: store [[STRUCT_ANON_92]] [[TMP5]], %struct.anon.92* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.92*)* @__omp_outlined__94 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17596,10 +19421,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__94 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.92* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.92*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17609,80 +19435,89 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.92* [[__CONTEXT]], %struct.anon.92** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.92*, %struct.anon.92** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-NEXT: ret void // // // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_93:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_93]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.93* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_93]], %struct.anon.93* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.93* +// CHECK-32-NEXT: store [[STRUCT_ANON_93]] [[TMP5]], %struct.anon.93* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.93*)* @__omp_outlined__95 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17690,10 +19525,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__95 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.93* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.93*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17703,38 +19539,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.93* [[__CONTEXT]], %struct.anon.93** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.93*, %struct.anon.93** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17748,14 +19586,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_94:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_94]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.94* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_94]], %struct.anon.94* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.94* +// CHECK-32-NEXT: store [[STRUCT_ANON_94]] [[TMP5]], %struct.anon.94* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.94*)* @__omp_outlined__96 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17763,10 +19608,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__96 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.94* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.94*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17776,38 +19622,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.94* [[__CONTEXT]], %struct.anon.94** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.94*, %struct.anon.94** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17821,14 +19669,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_95:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_95]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.95* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_95]], %struct.anon.95* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.95* +// CHECK-32-NEXT: store [[STRUCT_ANON_95]] [[TMP5]], %struct.anon.95* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.95*)* @__omp_outlined__97 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17836,10 +19691,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__97 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.95* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.95*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17849,38 +19705,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.95* [[__CONTEXT]], %struct.anon.95** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.95*, %struct.anon.95** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17894,14 +19752,21 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-32-SAME: () #[[ATTR8]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_96:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_96]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.96* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_96]], %struct.anon.96* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.96* +// CHECK-32-NEXT: store [[STRUCT_ANON_96]] [[TMP5]], %struct.anon.96* [[TMP6]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.96*)* @__omp_outlined__98 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -17909,10 +19774,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__98 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.96* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.96*, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -17922,38 +19788,40 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store %struct.anon.96* [[__CONTEXT]], %struct.anon.96** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.96*, %struct.anon.96** [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-32: omp.inner.for.end: @@ -17968,7 +19836,7 @@ // CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -17978,15 +19846,14 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV1]], align 1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[TMP2]], align 1 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -17994,175 +19861,191 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED16:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS19:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_0]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG19:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[TMP1]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP9]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK-32-EX-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* -// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL4]] to i32 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP19]], i32 3), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[TMP12]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK-32-EX-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL4]], i8* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP18]], i8* [[TMP19]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP20]] to %struct.anon.0* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_0]] [[TMP21]], %struct.anon.0* [[TMP22]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = zext i1 [[TOBOOL5]] to i32 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP24]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP20]]), !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP19]], i32 12), !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP26]], 9 -// CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] -// CHECK-32-EX: cond.true8: -// CHECK-32-EX-NEXT: br label [[COND_END10:%.*]] -// CHECK-32-EX: cond.false9: -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: br label [[COND_END10]] -// CHECK-32-EX: cond.end10: -// CHECK-32-EX-NEXT: [[COND11:%.*]] = phi i32 [ 9, [[COND_TRUE8]] ], [ [[TMP27]], [[COND_FALSE9]] ] -// CHECK-32-EX-NEXT: store i32 [[COND11]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] -// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-32-EX-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP31]], 9 +// CHECK-32-EX-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK-32-EX: cond.true9: +// CHECK-32-EX-NEXT: br label [[COND_END11:%.*]] +// CHECK-32-EX: cond.false10: +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: br label [[COND_END11]] +// CHECK-32-EX: cond.end11: +// CHECK-32-EX-NEXT: [[COND12:%.*]] = phi i32 [ 9, [[COND_TRUE9]] ], [ [[TMP32]], [[COND_FALSE10]] ] +// CHECK-32-EX-NEXT: store i32 [[COND12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP130]] +// CHECK-32-EX-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP130]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP131:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] -// CHECK-32-EX: omp.inner.for.cond12: -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP13:%.*]] = icmp slt i32 [[TMP29]], 10 -// CHECK-32-EX-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END30:%.*]] -// CHECK-32-EX: omp.inner.for.body14: -// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK-32-EX-NEXT: [[CONV17:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED16]] to i8* -// CHECK-32-EX-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL15]] to i8 -// CHECK-32-EX-NEXT: store i8 [[FROMBOOL18]], i8* [[CONV17]], align 1 -// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED16]], align 4 -// CHECK-32-EX-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP35:%.*]] = inttoptr i32 [[TMP30]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK-32-EX-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP37:%.*]] = inttoptr i32 [[TMP31]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK-32-EX-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS19]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP39:%.*]] = inttoptr i32 [[TMP33]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK-32-EX-NEXT: [[TMP40:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP40]] to i1 -// CHECK-32-EX-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL20]] to i32 -// CHECK-32-EX-NEXT: [[TMP42:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS19]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP42]], i32 3) +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] +// CHECK-32-EX: omp.inner.for.cond13: +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP14:%.*]] = icmp slt i32 [[TMP34]], 10 +// CHECK-32-EX-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END30:%.*]] +// CHECK-32-EX: omp.inner.for.body15: +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP35]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP36]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP38]], i32* [[TMP37]], align 4 +// CHECK-32-EX-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP40]], i32* [[TMP39]], align 4 +// CHECK-32-EX-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP42:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP42]] to i1 +// CHECK-32-EX-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TOBOOL17]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL18]], i8* [[TMP41]], align 4 +// CHECK-32-EX-NEXT: [[TMP43:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG19]] to i8* +// CHECK-32-EX-NEXT: [[TMP44:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK-32-EX-NEXT: [[TMP45:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP43]], i8* [[TMP44]]) +// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_16]], align 1 +// CHECK-32-EX-NEXT: [[TMP47:%.*]] = bitcast i8* [[TMP45]] to %struct.anon.0* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_0]] [[TMP46]], %struct.anon.0* [[TMP47]], align 1 +// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL20:%.*]] = trunc i8 [[TMP48]] to i1 +// CHECK-32-EX-NEXT: [[TMP49:%.*]] = zext i1 [[TOBOOL20]] to i32 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 [[TMP49]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__2 to i8*), i8* null, i8* [[TMP45]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP44]], i32 12) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] // CHECK-32-EX: omp.inner.for.inc21: -// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP50]], [[TMP51]] // CHECK-32-EX-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK-32-EX-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP52]], [[TMP53]] // CHECK-32-EX-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK-32-EX-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP54]], [[TMP55]] // CHECK-32-EX-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP49]], 9 +// CHECK-32-EX-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP56]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP25]], label [[COND_TRUE26:%.*]], label [[COND_FALSE27:%.*]] // CHECK-32-EX: cond.true26: // CHECK-32-EX-NEXT: br label [[COND_END28:%.*]] // CHECK-32-EX: cond.false27: -// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP57:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END28]] // CHECK-32-EX: cond.end28: -// CHECK-32-EX-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP50]], [[COND_FALSE27]] ] +// CHECK-32-EX-NEXT: [[COND29:%.*]] = phi i32 [ 9, [[COND_TRUE26]] ], [ [[TMP57]], [[COND_FALSE27]] ] // CHECK-32-EX-NEXT: store i32 [[COND29]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP51]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP134:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP58]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP134:![0-9]+]] // CHECK-32-EX: omp.inner.for.end30: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP53]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-EX-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP60:%.*]] = icmp ne i32 [[TMP59]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP60]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18171,13 +20054,14 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -18187,86 +20071,95 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP136]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP136]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP137:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] -// CHECK-32-EX: omp.inner.for.cond2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] -// CHECK-32-EX: omp.inner.for.body4: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] -// CHECK-32-EX: omp.body.continue7: -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] -// CHECK-32-EX: omp.inner.for.inc8: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP139:![0-9]+]] -// CHECK-32-EX: omp.inner.for.end10: +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] +// CHECK-32-EX: omp.inner.for.cond3: +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP21]], [[TMP22]] +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END11:%.*]] +// CHECK-32-EX: omp.inner.for.body5: +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], 1 +// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-EX-NEXT: store i32 [[ADD7]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE8:%.*]] +// CHECK-32-EX: omp.body.continue8: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC9:%.*]] +// CHECK-32-EX: omp.inner.for.inc9: +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP139:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end11: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18275,13 +20168,14 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -18291,86 +20185,95 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* +// CHECK-32-EX-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i8, i8* [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK-32-EX-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK-32-EX: omp_if.then: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP140]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP140]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP141:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]] // CHECK-32-EX: omp_if.else: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]] -// CHECK-32-EX: omp.inner.for.cond2: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]] -// CHECK-32-EX: omp.inner.for.body4: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK-32-EX-NEXT: store i32 [[ADD6]], i32* [[I]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]] -// CHECK-32-EX: omp.body.continue7: -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]] -// CHECK-32-EX: omp.inner.for.inc8: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK-32-EX-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP143:![0-9]+]] -// CHECK-32-EX: omp.inner.for.end10: +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP19]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] +// CHECK-32-EX: omp.inner.for.cond3: +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP21]], [[TMP22]] +// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END11:%.*]] +// CHECK-32-EX: omp.inner.for.body5: +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], 1 +// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] +// CHECK-32-EX-NEXT: store i32 [[ADD7]], i32* [[I]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE8:%.*]] +// CHECK-32-EX: omp.body.continue8: +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC9:%.*]] +// CHECK-32-EX: omp.inner.for.inc9: +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK-32-EX-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP143:![0-9]+]] +// CHECK-32-EX: omp.inner.for.end11: // CHECK-32-EX-NEXT: br label [[OMP_IF_END]] // CHECK-32-EX: omp_if.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP20]]) -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP27]]) +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18381,6 +20284,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -18390,7 +20294,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -18398,92 +20302,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__4 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.2* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.2* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_2]] [[TMP16]], %struct.anon.2* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.2*)* @__omp_outlined__4 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP144]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP144]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18492,12 +20410,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -18507,58 +20426,64 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP147]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18569,6 +20494,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -18578,7 +20504,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -18586,92 +20512,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.3* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.3*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.3* [[__CONTEXT]], %struct.anon.3** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__6 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.4* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_4]], %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.4* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_4]] [[TMP16]], %struct.anon.4* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.4*)* @__omp_outlined__6 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP150]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP150]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18680,12 +20620,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.4* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.4*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -18695,48 +20636,54 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.4* [[__CONTEXT]], %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP153]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP153]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18747,6 +20694,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -18756,7 +20704,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__7(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -18764,92 +20712,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.5* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.5*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.5* [[__CONTEXT]], %struct.anon.5** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.5*, %struct.anon.5** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__8 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.6* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_6]], %struct.anon.6* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.6* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_6]] [[TMP16]], %struct.anon.6* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.6*)* @__omp_outlined__8 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP156]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP156]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18858,12 +20820,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.6* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.6*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -18873,46 +20836,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.6* [[__CONTEXT]], %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP159]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -18920,9 +20889,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -18933,6 +20902,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -18942,7 +20912,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.7* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -18950,92 +20920,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.7* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.7*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.7* [[__CONTEXT]], %struct.anon.7** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.7*, %struct.anon.7** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.8* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_8]], %struct.anon.8* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.8* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_8]] [[TMP16]], %struct.anon.8* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.8*)* @__omp_outlined__10 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP162]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP162]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19044,12 +21028,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.8* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.8*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19059,46 +21044,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.8* [[__CONTEXT]], %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], %struct.anon.8* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], %struct.anon.8* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP165]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -19106,9 +21097,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19119,6 +21110,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19128,7 +21120,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.9* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -19136,92 +21128,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.9* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.9*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.9* [[__CONTEXT]], %struct.anon.9** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.9*, %struct.anon.9** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.10* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_10]], %struct.anon.10* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.10* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_10]] [[TMP16]], %struct.anon.10* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.10*)* @__omp_outlined__12 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP168]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP168]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19230,12 +21236,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__12 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.10* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.10*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19245,46 +21252,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.10* [[__CONTEXT]], %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], %struct.anon.10* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], %struct.anon.10* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP171]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -19292,9 +21305,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19305,6 +21318,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19314,7 +21328,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__13(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.11* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -19322,92 +21336,106 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__13 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.11* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.11*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_12]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.11* [[__CONTEXT]], %struct.anon.11** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.11*, %struct.anon.11** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__14 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.12* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_12]], %struct.anon.12* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.12* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_12]] [[TMP16]], %struct.anon.12* [[TMP17]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.12*)* @__omp_outlined__14 to i8*), i8* null, i8* [[TMP15]]), !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8), !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP174]] +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP174]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19416,12 +21444,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__14 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.12* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.12*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19431,46 +21460,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.12* [[__CONTEXT]], %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], %struct.anon.12* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], %struct.anon.12* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP177]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -19478,9 +21513,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -19492,7 +21527,7 @@ // CHECK-32-EX-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 @@ -19501,12 +21536,12 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__15(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.13* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -19514,23 +21549,30 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__15 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.13* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.13*, align 4 +// CHECK-32-EX-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_14]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.13* [[__CONTEXT]], %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.13*, %struct.anon.13** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], %struct.anon.13* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[A]], align 4 // CHECK-32-EX-NEXT: [[A1:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK-32-EX-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -19538,82 +21580,87 @@ // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP15]], i8** [[TMP14]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @__omp_outlined__16 to i8*), i8* null, i8** [[TMP16]], i32 3) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP14]], i32* [[TMP13]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP16]], i32* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast %struct.anon.14* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 12) +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP17]], i8* [[TMP18]]) +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_14]], %struct.anon.14* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP19]] to %struct.anon.14* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_14]] [[TMP20]], %struct.anon.14* [[TMP21]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.14*)* @__omp_outlined__16 to i8*), i8* null, i8* [[TMP19]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP18]], i32 12) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP23]], 9 +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP28]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK-32-EX: cond.true6: // CHECK-32-EX-NEXT: br label [[COND_END8:%.*]] // CHECK-32-EX: cond.false7: -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END8]] // CHECK-32-EX: cond.end8: -// CHECK-32-EX-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP24]], [[COND_FALSE7]] ] +// CHECK-32-EX-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP29]], [[COND_FALSE7]] ] // CHECK-32-EX-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32-EX: .omp.lastprivate.then: -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP28]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP33]], i32* [[A]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32-EX: .omp.lastprivate.done: // CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) @@ -19621,13 +21668,14 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__16 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.14* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.14*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19638,54 +21686,62 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.14* [[__CONTEXT]], %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], %struct.anon.14* [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[A]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP15]], i32* [[A1]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32-EX: .omp.lastprivate.then: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP13]], i32* [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP20]], i32* [[A]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32-EX: .omp.lastprivate.done: // CHECK-32-EX-NEXT: ret void @@ -19694,6 +21750,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19703,7 +21760,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__17(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.15* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -19711,99 +21768,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__17 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.15* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.15*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_16]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.15* [[__CONTEXT]], %struct.anon.15** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.15*, %struct.anon.15** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__18 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.16* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_16]], %struct.anon.16* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.16* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_16]] [[TMP16]], %struct.anon.16* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.16*)* @__omp_outlined__18 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__18 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.16* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.16*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19813,61 +21885,68 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.16* [[__CONTEXT]], %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], %struct.anon.16* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], %struct.anon.16* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19877,7 +21956,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__19(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.17* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -19885,99 +21964,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__19 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.17* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.17*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_18]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.17* [[__CONTEXT]], %struct.anon.17** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__20 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.18* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_18]], %struct.anon.18* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.18* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_18]] [[TMP16]], %struct.anon.18* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.18*)* @__omp_outlined__20 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__20 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.18* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.18*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -19987,51 +22081,58 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.18* [[__CONTEXT]], %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], %struct.anon.18* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20041,7 +22142,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__21(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.19* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20049,99 +22150,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__21 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.19* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.19*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_20]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.19* [[__CONTEXT]], %struct.anon.19** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.19*, %struct.anon.19** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__22 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.20* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_20]], %struct.anon.20* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.20* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_20]] [[TMP16]], %struct.anon.20* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.20*)* @__omp_outlined__22 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__22 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.20* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.20*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -20151,46 +22267,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.20* [[__CONTEXT]], %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.20*, %struct.anon.20** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], %struct.anon.20* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], %struct.anon.20* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP180]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -20204,6 +22326,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20213,7 +22336,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__23(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.21* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20221,99 +22344,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__23 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.21* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.21*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_22]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.21* [[__CONTEXT]], %struct.anon.21** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__24 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.22* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_22]], %struct.anon.22* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.22* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_22]] [[TMP16]], %struct.anon.22* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.22*)* @__omp_outlined__24 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__24 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.22* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.22*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -20323,46 +22461,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.22* [[__CONTEXT]], %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.22*, %struct.anon.22** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], %struct.anon.22* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], %struct.anon.22* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP183]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -20376,6 +22520,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20385,7 +22530,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__25(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.23* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20393,99 +22538,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__25 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.23* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.23*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_24]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.23* [[__CONTEXT]], %struct.anon.23** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__26 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.24* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_24]], %struct.anon.24* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.24* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_24]] [[TMP16]], %struct.anon.24* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.24*)* @__omp_outlined__26 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__26 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.24* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.24*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -20495,46 +22655,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.24* [[__CONTEXT]], %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.24*, %struct.anon.24** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], %struct.anon.24* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], %struct.anon.24* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP186]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -20548,6 +22714,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20557,7 +22724,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__27(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.25* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20565,99 +22732,114 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__27 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.25* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.25*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_26]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.25* [[__CONTEXT]], %struct.anon.25** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.25*, %struct.anon.25** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__28 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast %struct.anon.26* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP13]], i8* [[TMP14]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_26]], %struct.anon.26* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP15]] to %struct.anon.26* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_26]] [[TMP16]], %struct.anon.26* [[TMP17]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.26*)* @__omp_outlined__28 to i8*), i8* null, i8* [[TMP15]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP14]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__28 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.26* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.26*, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -20667,46 +22849,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.26* [[__CONTEXT]], %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.26*, %struct.anon.26** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], %struct.anon.26* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], %struct.anon.26* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP189]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -20720,6 +22908,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20729,7 +22918,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__29(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.27* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20737,10 +22926,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__29 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.27* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.27*, align 4 // CHECK-32-EX-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -20748,82 +22938,93 @@ // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_28]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.27* [[__CONTEXT]], %struct.anon.27** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.27*, %struct.anon.27** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__30 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.28* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_28]], %struct.anon.28* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.28* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_28]] [[TMP14]], %struct.anon.28* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.28*)* @__omp_outlined__30 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP192]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP192]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -20832,12 +23033,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__30 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.28* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.28*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -20847,48 +23047,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.28* [[__CONTEXT]], %struct.anon.28** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.28*, %struct.anon.28** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], %struct.anon.28* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], %struct.anon.28* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP195]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP195]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -20899,6 +23103,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20908,7 +23113,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__31(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.29* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -20916,10 +23121,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__31 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.29* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.29*, align 4 // CHECK-32-EX-NEXT: [[B:%.*]] = alloca [3 x i32], align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -20927,84 +23133,95 @@ // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_30:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_30]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = bitcast [3 x i32]* [[B]] to i8* -// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP0]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) +// CHECK-32-EX-NEXT: store %struct.anon.29* [[__CONTEXT]], %struct.anon.29** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.29*, %struct.anon.29** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = bitcast [3 x i32]* [[B]] to i8* +// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 bitcast ([3 x i32]* @"__const..b" to i8*), i32 12, i1 false) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__32 to i8*), i8* null, i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.30* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_30]], %struct.anon.30* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.30* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_30]] [[TMP15]], %struct.anon.30* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.30*)* @__omp_outlined__32 to i8*), i8* null, i8* [[TMP14]]), !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i32 8), !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP23]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP24]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] -// CHECK-32-EX-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP198]] +// CHECK-32-EX-NEXT: store i32 [[TMP25]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP198]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21013,12 +23230,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__32 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.30* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.30*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21028,58 +23244,62 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.30* [[__CONTEXT]], %struct.anon.30** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.30*, %struct.anon.30** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_30:%.*]], %struct.anon.30* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_30]], %struct.anon.30* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP201]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21090,6 +23310,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -21099,7 +23320,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__33(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.31* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -21107,10 +23328,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__33 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.31* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.31*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 @@ -21118,9 +23340,16 @@ // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_32]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.31* [[__CONTEXT]], %struct.anon.31** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.31*, %struct.anon.31** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]] to i32* +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK:%.*]] = bitcast i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]] to i32* // CHECK-32-EX-NEXT: [[C:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) // CHECK-32-EX-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C]] to i32** // CHECK-32-EX-NEXT: [[B:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) @@ -21129,53 +23358,59 @@ // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8** [[TMP13]], i32 2), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB_ON_STACK]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB_ON_STACK]], i32** [[TMP11]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast %struct.anon.32* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP12]], i8* [[TMP13]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load [[STRUCT_ANON_32]], %struct.anon.32* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP14]] to %struct.anon.32* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_32]] [[TMP15]], %struct.anon.32* [[TMP16]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.32*)* @__omp_outlined__34 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__34_wrapper to i8*), i8* [[TMP14]]), !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP13]], i32 8), !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP204]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP204]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21183,16 +23418,17 @@ // CHECK-32-EX-NEXT: store i32* [[B_ON_STACK]], i32** [[C_ON_STACK]], align 4 // CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[B]], i32 4) // CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[C]], i32 4) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32 4) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32 4) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__34 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.32* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.32*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21202,48 +23438,52 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.32* [[__CONTEXT]], %struct.anon.32** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.32*, %struct.anon.32** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_32:%.*]], %struct.anon.32* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_32]], %struct.anon.32* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP207]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP207]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21257,25 +23497,22 @@ // CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8*, align 4 // CHECK-32-EX-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 1 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32 [[TMP5]], i32 [[TMP8]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables_aggregate(i8** [[GLOBAL_ARGS]]) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8*, i8** [[GLOBAL_ARGS]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.anon.32** +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast %struct.anon.32** [[TMP3]] to %struct.anon.32* +// CHECK-32-EX-NEXT: call void @__omp_outlined__34(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], %struct.anon.32* [[TMP4]]) #[[ATTR2]] // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_33:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21285,7 +23522,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__35(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.33* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -21293,92 +23530,104 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__35 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.33* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.33*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_34]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.33* [[__CONTEXT]], %struct.anon.33** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.33*, %struct.anon.33** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__36 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.34* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_34]], %struct.anon.34* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.34* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_34]] [[TMP14]], %struct.anon.34* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.34*)* @__omp_outlined__36 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP210]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP210]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21387,12 +23636,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__36 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.34* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.34*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21402,46 +23650,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.34* [[__CONTEXT]], %struct.anon.34** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.34*, %struct.anon.34** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_34:%.*]], %struct.anon.34* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], %struct.anon.34* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP213]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -21449,9 +23701,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21462,6 +23714,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21471,7 +23724,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__37(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.35* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -21479,92 +23732,104 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__37 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.35* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.35*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_36:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_36]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.35* [[__CONTEXT]], %struct.anon.35** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.35*, %struct.anon.35** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__38 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.36* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_36]], %struct.anon.36* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.36* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_36]] [[TMP14]], %struct.anon.36* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.36*)* @__omp_outlined__38 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP216]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP216]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21573,12 +23838,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__38 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.36* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.36*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21588,46 +23852,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.36* [[__CONTEXT]], %struct.anon.36** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.36*, %struct.anon.36** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_36:%.*]], %struct.anon.36* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_36]], %struct.anon.36* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP219]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -21635,9 +23903,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21648,6 +23916,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21657,7 +23926,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__39(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.37* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -21665,92 +23934,104 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__39 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.37* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.37*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_38]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.37* [[__CONTEXT]], %struct.anon.37** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.37*, %struct.anon.37** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__40 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.38* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_38]], %struct.anon.38* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.38* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_38]] [[TMP14]], %struct.anon.38* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.38*)* @__omp_outlined__40 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP222]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP222]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21759,12 +24040,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__40 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.38* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.38*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21774,46 +24054,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.38* [[__CONTEXT]], %struct.anon.38** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.38*, %struct.anon.38** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_38:%.*]], %struct.anon.38* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_38]], %struct.anon.38* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP225]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -21821,9 +24105,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21834,6 +24118,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_39:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21843,7 +24128,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__41(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.39* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -21851,92 +24136,104 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__41 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.39* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.39*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_40]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.39* [[__CONTEXT]], %struct.anon.39** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.39*, %struct.anon.39** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__42 to i8*), i8* null, i8** [[TMP12]], i32 2), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.40* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_40]], %struct.anon.40* [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.40* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_40]] [[TMP14]], %struct.anon.40* [[TMP15]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.40*)* @__omp_outlined__42 to i8*), i8* null, i8* [[TMP13]]), !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8), !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP228]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP228]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -21945,12 +24242,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__42 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.40* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.40*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -21960,46 +24256,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.40* [[__CONTEXT]], %struct.anon.40** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.40*, %struct.anon.40** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_40:%.*]], %struct.anon.40* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_40]], %struct.anon.40* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -22007,9 +24307,9 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -22020,6 +24320,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22029,7 +24330,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__43(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.41* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22037,99 +24338,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__43 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.41* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.41*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_42:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_42]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.41* [[__CONTEXT]], %struct.anon.41** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.41*, %struct.anon.41** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__44 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.42* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_42]], %struct.anon.42* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.42* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_42]] [[TMP14]], %struct.anon.42* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.42*)* @__omp_outlined__44 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__44 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.42* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.42*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22139,51 +24451,56 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.42* [[__CONTEXT]], %struct.anon.42** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.42*, %struct.anon.42** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_42:%.*]], %struct.anon.42* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_42]], %struct.anon.42* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_43:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22193,7 +24510,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__45(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.43* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22201,99 +24518,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__45 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.43* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.43*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_44:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_44]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.43* [[__CONTEXT]], %struct.anon.43** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.43*, %struct.anon.43** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__46 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.44* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_44]], %struct.anon.44* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.44* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_44]] [[TMP14]], %struct.anon.44* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.44*)* @__omp_outlined__46 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__46 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.44* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.44*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22303,61 +24631,66 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.44* [[__CONTEXT]], %struct.anon.44** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.44*, %struct.anon.44** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_44:%.*]], %struct.anon.44* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_44]], %struct.anon.44* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_45:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22367,7 +24700,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__47(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.45* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22375,99 +24708,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__47 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.45* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.45*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_46:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_46]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.45* [[__CONTEXT]], %struct.anon.45** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.45*, %struct.anon.45** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__48 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.46* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_46]], %struct.anon.46* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.46* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_46]] [[TMP14]], %struct.anon.46* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.46*)* @__omp_outlined__48 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__48 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.46* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.46*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22477,51 +24821,56 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.46* [[__CONTEXT]], %struct.anon.46** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.46*, %struct.anon.46** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_46:%.*]], %struct.anon.46* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_46]], %struct.anon.46* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_47:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22531,7 +24880,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__49(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.47* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22539,99 +24888,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__49 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.47* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.47*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_48:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_48]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.47* [[__CONTEXT]], %struct.anon.47** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.47*, %struct.anon.47** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__50 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.48* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_48]], %struct.anon.48* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.48* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_48]] [[TMP14]], %struct.anon.48* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.48*)* @__omp_outlined__50 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__50 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.48* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.48*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22641,46 +25001,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.48* [[__CONTEXT]], %struct.anon.48** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.48*, %struct.anon.48** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_48:%.*]], %struct.anon.48* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_48]], %struct.anon.48* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP234]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -22694,6 +25058,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_49:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22703,7 +25068,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__51(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.49* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22711,99 +25076,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__51 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.49* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.49*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_50:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_50]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.49* [[__CONTEXT]], %struct.anon.49** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.49*, %struct.anon.49** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__52 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.50* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_50]], %struct.anon.50* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.50* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_50]] [[TMP14]], %struct.anon.50* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.50*)* @__omp_outlined__52 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__52 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.50* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.50*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22813,46 +25189,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.50* [[__CONTEXT]], %struct.anon.50** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.50*, %struct.anon.50** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_50:%.*]], %struct.anon.50* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_50]], %struct.anon.50* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP237]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -22866,6 +25246,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_51:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22875,7 +25256,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__53(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.51* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -22883,99 +25264,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__53 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.51* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.51*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_52:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_52]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.51* [[__CONTEXT]], %struct.anon.51** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.51*, %struct.anon.51** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__54 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.52* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_52]], %struct.anon.52* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.52* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_52]] [[TMP14]], %struct.anon.52* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.52*)* @__omp_outlined__54 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__54 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.52* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.52*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -22985,46 +25377,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.52* [[__CONTEXT]], %struct.anon.52** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.52*, %struct.anon.52** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_52:%.*]], %struct.anon.52* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_52]], %struct.anon.52* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP240]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -23038,6 +25434,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_53:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23047,7 +25444,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__55(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.53* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23055,99 +25452,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__55 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.53* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.53*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_54:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_54]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.53* [[__CONTEXT]], %struct.anon.53** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.53*, %struct.anon.53** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__56 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.54* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_54]], %struct.anon.54* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.54* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_54]] [[TMP14]], %struct.anon.54* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.54*)* @__omp_outlined__56 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__56 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.54* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.54*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -23157,46 +25565,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.54* [[__CONTEXT]], %struct.anon.54** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.54*, %struct.anon.54** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_54:%.*]], %struct.anon.54* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_54]], %struct.anon.54* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP243]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -23210,6 +25622,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_55:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23219,7 +25632,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__57(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.55* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23227,99 +25640,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__57 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.55* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.55*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_56:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_56]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.55* [[__CONTEXT]], %struct.anon.55** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.55*, %struct.anon.55** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__58 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.56* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_56]], %struct.anon.56* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.56* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_56]] [[TMP14]], %struct.anon.56* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.56*)* @__omp_outlined__58 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__58 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.56* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.56*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -23329,51 +25753,56 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.56* [[__CONTEXT]], %struct.anon.56** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.56*, %struct.anon.56** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_56:%.*]], %struct.anon.56* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_56]], %struct.anon.56* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_57:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23383,7 +25812,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__59(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.57* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23391,99 +25820,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__59 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.57* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.57*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_58:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_58]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.57* [[__CONTEXT]], %struct.anon.57** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.57*, %struct.anon.57** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__60 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.58* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_58]], %struct.anon.58* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.58* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_58]] [[TMP14]], %struct.anon.58* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.58*)* @__omp_outlined__60 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__60 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.58* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.58*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -23493,61 +25933,66 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.58* [[__CONTEXT]], %struct.anon.58** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.58*, %struct.anon.58** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_58:%.*]], %struct.anon.58* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_58]], %struct.anon.58* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_59:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23557,7 +26002,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__61(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.59* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23565,99 +26010,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__61 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.59* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.59*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_60:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_60]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.59* [[__CONTEXT]], %struct.anon.59** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.59*, %struct.anon.59** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__62 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.60* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_60]], %struct.anon.60* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.60* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_60]] [[TMP14]], %struct.anon.60* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.60*)* @__omp_outlined__62 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__62 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.60* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.60*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -23667,51 +26123,56 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.60* [[__CONTEXT]], %struct.anon.60** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.60*, %struct.anon.60** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_60:%.*]], %struct.anon.60* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_60]], %struct.anon.60* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_61:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23721,7 +26182,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__63(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.61* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23729,99 +26190,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__63 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.61* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.61*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_62:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_62]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.61* [[__CONTEXT]], %struct.anon.61** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.61*, %struct.anon.61** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__64 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.62* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_62]], %struct.anon.62* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.62* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_62]] [[TMP14]], %struct.anon.62* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.62*)* @__omp_outlined__64 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__64 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.62* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.62*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -23831,46 +26303,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.62* [[__CONTEXT]], %struct.anon.62** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.62*, %struct.anon.62** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_62:%.*]], %struct.anon.62* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_62]], %struct.anon.62* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741862, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP246]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -23884,6 +26360,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_63:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23893,7 +26370,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__65(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.63* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -23901,99 +26378,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__65 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.63* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.63*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_64:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_64]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.63* [[__CONTEXT]], %struct.anon.63** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.63*, %struct.anon.63** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__66 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.64* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_64]], %struct.anon.64* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.64* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_64]] [[TMP14]], %struct.anon.64* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.64*)* @__omp_outlined__66 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__66 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.64* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.64*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24003,46 +26491,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.64* [[__CONTEXT]], %struct.anon.64** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.64*, %struct.anon.64** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_64:%.*]], %struct.anon.64* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_64]], %struct.anon.64* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741861, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP249]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24056,6 +26548,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_65:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24065,7 +26558,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__67(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.65* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24073,99 +26566,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__67 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.65* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.65*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_66:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_66]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.65* [[__CONTEXT]], %struct.anon.65** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.65*, %struct.anon.65** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__68 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.66* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_66]], %struct.anon.66* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.66* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_66]] [[TMP14]], %struct.anon.66* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.66*)* @__omp_outlined__68 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__68 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.66* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.66*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24175,46 +26679,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.66* [[__CONTEXT]], %struct.anon.66** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.66*, %struct.anon.66** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_66:%.*]], %struct.anon.66* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_66]], %struct.anon.66* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741859, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP252]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24228,6 +26736,7 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155 // CHECK-32-EX-SAME: () #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_67:%.*]], align 1 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24237,7 +26746,7 @@ // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__69(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], %struct.anon.67* [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24245,99 +26754,110 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__69 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.67* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.67*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_68:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_68]], align 8 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.67* [[__CONTEXT]], %struct.anon.67** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.67*, %struct.anon.67** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32)* @__omp_outlined__70 to i8*), i8* null, i8** [[TMP12]], i32 2) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_LB]], i32** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i32* [[DOTCAPTURED_OMP_PREVIOUS_UB]], i32** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast %struct.anon.68* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP11]], i8* [[TMP12]]) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load [[STRUCT_ANON_68]], %struct.anon.68* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP13]] to %struct.anon.68* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_68]] [[TMP14]], %struct.anon.68* [[TMP15]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.68*)* @__omp_outlined__70 to i8*), i8* null, i8* [[TMP13]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP12]], i32 8) // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], 9 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK-32-EX: cond.true5: // CHECK-32-EX-NEXT: br label [[COND_END7:%.*]] // CHECK-32-EX: cond.false6: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END7]] // CHECK-32-EX: cond.end7: -// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP20]], [[COND_FALSE6]] ] +// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK-32-EX-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__70 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.68* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.68*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24347,46 +26867,50 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.68* [[__CONTEXT]], %struct.anon.68** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.68*, %struct.anon.68** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_68:%.*]], %struct.anon.68* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_68]], %struct.anon.68* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 1073741860, i32 [[TMP7]], i32 [[TMP8]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP255]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24401,7 +26925,8 @@ // CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_69:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_69]], align 8 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24409,11 +26934,17 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__71 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.69* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_69]], %struct.anon.69* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.69* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_69]] [[TMP5]], %struct.anon.69* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.69*)* @__omp_outlined__71 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24421,10 +26952,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__71 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.69* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.69*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24434,80 +26966,89 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.69* [[__CONTEXT]], %struct.anon.69** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.69*, %struct.anon.69** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_70:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_70]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__72 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.70* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_70]], %struct.anon.70* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.70* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_70]] [[TMP5]], %struct.anon.70* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.70*)* @__omp_outlined__72 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24515,10 +27056,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__72 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.70* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.70*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24528,63 +27070,72 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.70* [[__CONTEXT]], %struct.anon.70** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.70*, %struct.anon.70** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_71:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_71]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__73 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.71* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_71]], %struct.anon.71* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.71* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_71]] [[TMP5]], %struct.anon.71* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.71*)* @__omp_outlined__73 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24592,10 +27143,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__73 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.71* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.71*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24605,80 +27157,89 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.71* [[__CONTEXT]], %struct.anon.71** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.71*, %struct.anon.71** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_72:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_72]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__74 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.72* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_72]], %struct.anon.72* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.72* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_72]] [[TMP5]], %struct.anon.72* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.72*)* @__omp_outlined__74 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24686,10 +27247,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__74 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.72* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.72*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24699,38 +27261,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.72* [[__CONTEXT]], %struct.anon.72** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.72*, %struct.anon.72** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP258]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24744,14 +27308,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_73:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_73]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__75 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.73* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_73]], %struct.anon.73* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.73* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_73]] [[TMP5]], %struct.anon.73* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.73*)* @__omp_outlined__75 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24759,10 +27330,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__75 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.73* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.73*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24772,38 +27344,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.73* [[__CONTEXT]], %struct.anon.73** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.73*, %struct.anon.73** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP261]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP262:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24817,14 +27391,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_74:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_74]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__76 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.74* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_74]], %struct.anon.74* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.74* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_74]] [[TMP5]], %struct.anon.74* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.74*)* @__omp_outlined__76 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24832,10 +27413,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__76 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.74* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.74*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24845,38 +27427,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.74* [[__CONTEXT]], %struct.anon.74** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.74*, %struct.anon.74** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP264]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24890,14 +27474,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_75:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_75]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__77 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.75* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_75]], %struct.anon.75* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.75* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_75]] [[TMP5]], %struct.anon.75* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.75*)* @__omp_outlined__77 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24905,10 +27496,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__77 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.75* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.75*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24918,38 +27510,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.75* [[__CONTEXT]], %struct.anon.75** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.75*, %struct.anon.75** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP267]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP268:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -24964,7 +27558,8 @@ // CHECK-32-EX-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_76:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_76]], align 8 // CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK-32-EX-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24972,11 +27567,17 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__78 to i8*), i8* null, i8** [[TMP4]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.76* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_76]], %struct.anon.76* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.76* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_76]] [[TMP5]], %struct.anon.76* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.76*)* @__omp_outlined__78 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -24984,10 +27585,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__78 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.76* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.76*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -24997,88 +27599,97 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.76* [[__CONTEXT]], %struct.anon.76** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.76*, %struct.anon.76** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP270]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP271:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_77:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_77]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__79 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.77* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_77]], %struct.anon.77* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.77* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_77]] [[TMP5]], %struct.anon.77* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.77*)* @__omp_outlined__79 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25086,10 +27697,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__79 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.77* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.77*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25099,71 +27711,80 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.77* [[__CONTEXT]], %struct.anon.77** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.77*, %struct.anon.77** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP273]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP274:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_78:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_78]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__80 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.78* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_78]], %struct.anon.78* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.78* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_78]] [[TMP5]], %struct.anon.78* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.78*)* @__omp_outlined__80 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25171,10 +27792,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__80 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.78* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.78*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25184,88 +27806,97 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.78* [[__CONTEXT]], %struct.anon.78** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.78*, %struct.anon.78** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP276]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP277:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_79:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_79]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__81 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.79* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_79]], %struct.anon.79* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.79* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_79]] [[TMP5]], %struct.anon.79* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.79*)* @__omp_outlined__81 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25273,10 +27904,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__81 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.79* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.79*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25286,38 +27918,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.79* [[__CONTEXT]], %struct.anon.79** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.79*, %struct.anon.79** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP279]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP280:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25325,28 +27959,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_80:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_80]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__82 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.80* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_80]], %struct.anon.80* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.80* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_80]] [[TMP5]], %struct.anon.80* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.80*)* @__omp_outlined__82 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25354,10 +27995,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__82 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.80* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.80*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25367,38 +28009,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.80* [[__CONTEXT]], %struct.anon.80** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.80*, %struct.anon.80** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP282]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP283:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25406,28 +28050,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_81:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_81]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__83 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.81* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_81]], %struct.anon.81* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.81* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_81]] [[TMP5]], %struct.anon.81* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.81*)* @__omp_outlined__83 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25435,10 +28086,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__83 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.81* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.81*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25448,38 +28100,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.81* [[__CONTEXT]], %struct.anon.81** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.81*, %struct.anon.81** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP285]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP286:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25487,28 +28141,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_82:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_82]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__84 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.82* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_82]], %struct.anon.82* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.82* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_82]] [[TMP5]], %struct.anon.82* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.82*)* @__omp_outlined__84 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25516,10 +28177,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__84 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.82* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.82*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25529,38 +28191,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.82* [[__CONTEXT]], %struct.anon.82** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.82*, %struct.anon.82** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP288]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP289:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25568,28 +28232,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_83:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_83]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__85 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.83* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_83]], %struct.anon.83* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.83* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_83]] [[TMP5]], %struct.anon.83* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.83*)* @__omp_outlined__85 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25597,10 +28268,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__85 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.83* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.83*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25610,68 +28282,77 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.83* [[__CONTEXT]], %struct.anon.83** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.83*, %struct.anon.83** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 65, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP291]] -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP291]] +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP291]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP292:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_84:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_84]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__86 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.84* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_84]], %struct.anon.84* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.84* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_84]] [[TMP5]], %struct.anon.84* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.84*)* @__omp_outlined__86 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25679,10 +28360,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__86 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.84* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.84*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25692,71 +28374,80 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.84* [[__CONTEXT]], %struct.anon.84** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.84*, %struct.anon.84** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP294]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP295:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_85:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_85]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__87 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.85* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_85]], %struct.anon.85* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.85* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_85]] [[TMP5]], %struct.anon.85* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.85*)* @__omp_outlined__87 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25764,10 +28455,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__87 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.85* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.85*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25777,88 +28469,97 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.85* [[__CONTEXT]], %struct.anon.85** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.85*, %struct.anon.85** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP297]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP298:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_86:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_86]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__88 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.86* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_86]], %struct.anon.86* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.86* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_86]] [[TMP5]], %struct.anon.86* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.86*)* @__omp_outlined__88 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25866,10 +28567,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__88 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.86* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.86*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25879,38 +28581,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.86* [[__CONTEXT]], %struct.anon.86** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.86*, %struct.anon.86** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP300]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP301:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25918,28 +28622,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_87:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_87]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__89 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.87* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_87]], %struct.anon.87* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.87* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_87]] [[TMP5]], %struct.anon.87* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.87*)* @__omp_outlined__89 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -25947,10 +28658,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__89 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.87* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.87*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -25960,38 +28672,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.87* [[__CONTEXT]], %struct.anon.87** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.87*, %struct.anon.87** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP303]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP304:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -25999,28 +28713,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_88:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_88]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__90 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.88* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_88]], %struct.anon.88* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.88* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_88]] [[TMP5]], %struct.anon.88* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.88*)* @__omp_outlined__90 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26028,10 +28749,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__90 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.88* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.88*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26041,38 +28763,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.88* [[__CONTEXT]], %struct.anon.88** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.88*, %struct.anon.88** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP306]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP307:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -26080,28 +28804,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_89:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_89]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__91 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.89* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_89]], %struct.anon.89* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.89* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_89]] [[TMP5]], %struct.anon.89* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.89*)* @__omp_outlined__91 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26109,10 +28840,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__91 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.89* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.89*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26122,38 +28854,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.89* [[__CONTEXT]], %struct.anon.89** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.89*, %struct.anon.89** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP309]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP310:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -26161,28 +28895,35 @@ // CHECK-32-EX: omp.dispatch.inc: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_90:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_90]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__92 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.90* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_90]], %struct.anon.90* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.90* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_90]] [[TMP5]], %struct.anon.90* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.90*)* @__omp_outlined__92 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26190,10 +28931,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__92 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.90* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.90*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26203,80 +28945,89 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.90* [[__CONTEXT]], %struct.anon.90** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.90*, %struct.anon.90** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_91:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_91]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__93 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.91* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_91]], %struct.anon.91* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.91* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_91]] [[TMP5]], %struct.anon.91* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.91*)* @__omp_outlined__93 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26284,10 +29035,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__93 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.91* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.91*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26297,63 +29049,72 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.91* [[__CONTEXT]], %struct.anon.91** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.91*, %struct.anon.91** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK-32-EX: omp.loop.exit: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_92:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_92]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__94 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.92* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_92]], %struct.anon.92* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.92* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_92]] [[TMP5]], %struct.anon.92* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.92*)* @__omp_outlined__94 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26361,10 +29122,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__94 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.92* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.92*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26374,80 +29136,89 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.92* [[__CONTEXT]], %struct.anon.92** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.92*, %struct.anon.92** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK-32-EX-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK-32-EX-NEXT: ret void // // // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_93:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_93]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__95 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.93* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_93]], %struct.anon.93* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.93* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_93]] [[TMP5]], %struct.anon.93* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.93*)* @__omp_outlined__95 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26455,10 +29226,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__95 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.93* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.93*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26468,38 +29240,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.93* [[__CONTEXT]], %struct.anon.93** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.93*, %struct.anon.93** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741862, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP312]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP313:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -26513,14 +29287,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_94:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_94]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__96 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.94* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_94]], %struct.anon.94* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.94* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_94]] [[TMP5]], %struct.anon.94* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.94*)* @__omp_outlined__96 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26528,10 +29309,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__96 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.94* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.94*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26541,38 +29323,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.94* [[__CONTEXT]], %struct.anon.94** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.94*, %struct.anon.94** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741861, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP315]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP316:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -26586,14 +29370,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_95:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_95]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__97 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.95* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_95]], %struct.anon.95* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.95* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_95]] [[TMP5]], %struct.anon.95* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.95*)* @__omp_outlined__97 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26601,10 +29392,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__97 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.95* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.95*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26614,38 +29406,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.95* [[__CONTEXT]], %struct.anon.95** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.95*, %struct.anon.95** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741859, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP318]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP319:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: @@ -26659,14 +29453,21 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268 // CHECK-32-EX-SAME: () #[[ATTR8]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_96:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_96]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__98 to i8*), i8* null, i8** [[TMP2]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = bitcast %struct.anon.96* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP2]], i8* [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_96]], %struct.anon.96* [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP4]] to %struct.anon.96* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_96]] [[TMP5]], %struct.anon.96* [[TMP6]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.96*)* @__omp_outlined__98 to i8*), i8* null, i8* [[TMP4]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP3]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -26674,10 +29475,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__98 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.96* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.96*, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -26687,38 +29489,40 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store %struct.anon.96* [[__CONTEXT]], %struct.anon.96** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.96*, %struct.anon.96** [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1073741860, i32 0, i32 9, i32 1, i32 1) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) -// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]) +// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], i32* [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] -// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP321]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP322:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -91,7 +91,7 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 // CHECK1-NEXT: store double 3.000000e+00, ptr [[B]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR9:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -109,24 +109,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[BAR_B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[BAR_A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP1]] to double // CHECK1-NEXT: store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -140,7 +149,7 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -33,9 +33,11 @@ // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14 // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -44,16 +46,26 @@ // CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: store i32 10, ptr [[A]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-NEXT: [[TMP3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i64 8) // CHECK-NEXT: store i32 100, ptr [[B]], align 4 // CHECK-NEXT: store i32 1000, ptr [[C]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[B]], ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1 -// CHECK-NEXT: store ptr [[A]], ptr [[TMP4]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP8]]) +// CHECK-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP9]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 16) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -63,16 +75,18 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store i32 1000, ptr [[TMP0]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i32 1000, ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -86,33 +100,32 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[C1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK-NEXT: store ptr [[C]], ptr [[C1]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 -// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 10000 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -126,12 +139,8 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]] +// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -32,7 +32,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -49,12 +49,20 @@ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -62,171 +70,174 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[ARGC]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP2]], i64 40, i1 false) // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP22]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP28]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK4-NEXT: [[TMP31:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7) +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK4-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK4-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP28]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP29]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[ARGC]], ptr [[TMP30]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP32]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP33]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 56) +// CHECK4-NEXT: [[TMP36:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP35]]) +// CHECK4-NEXT: [[TMP37:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK4-NEXT: store [[STRUCT_ANON_0]] [[TMP37]], ptr [[TMP36]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK4-NEXT: [[TMP39:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP41]], i32 [[TMP39]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP36]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[TMP35]], i64 56) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: br label [[COND_END14:%.*]] -// CHECK4: cond.false13: -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END14]] -// CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE12]] ], [ [[TMP43]], [[COND_FALSE13]] ] -// CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK4: cond.true10: +// CHECK4-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END12:%.*]] +// CHECK4: cond.false11: +// CHECK4-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END12]] +// CHECK4: cond.end12: +// CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE10]] ], [ [[TMP51]], [[COND_FALSE11]] ] +// CHECK4-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP52]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK4-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP54]]) +// CHECK4-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 +// CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[C]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 40) +// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[C]], i64 40) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -236,103 +247,107 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK4-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false) -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP10]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP15]] -// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CONV5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK4-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]] -// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]] -// CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK4-NEXT: store i32 [[ADD20]], ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR8:[0-9]+]] +// CHECK4-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 [[IDXPROM]] +// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK4-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[C]], i64 0, i64 [[IDXPROM11]] +// CHECK4-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD10]], [[CALL13]] +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP14]], i64 0, i64 [[IDXPROM15]] +// CHECK4-NEXT: [[CALL17:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX16]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[ADD14]], [[CALL17]] +// CHECK4-NEXT: store i32 [[ADD18]], ptr [[TMP8]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK4-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP33]]) +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK4-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[C]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] @@ -348,7 +363,7 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -365,12 +380,20 @@ // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK5-NEXT: ret void // CHECK5: worker.exit: @@ -378,169 +401,172 @@ // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK5-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[ARGC]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 [[TMP2]], i32 40, i1 false) // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP22]], [[ADD]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP18]] to ptr -// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK5-NEXT: [[TMP29:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[ARGC]], ptr [[TMP28]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP30]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[C]], ptr [[TMP31]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 28) +// CHECK5-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK5-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: [[TMP37:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP39]], i32 [[TMP37]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i32 28) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: br label [[COND_END14:%.*]] -// CHECK5: cond.false13: -// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END14]] -// CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE12]] ], [ [[TMP41]], [[COND_FALSE13]] ] -// CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK5: cond.true10: +// CHECK5-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: br label [[COND_END12:%.*]] +// CHECK5: cond.false11: +// CHECK5-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END12]] +// CHECK5: cond.end12: +// CHECK5-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE10]] ], [ [[TMP49]], [[COND_FALSE11]] ] +// CHECK5-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP50]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) -// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 -// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP52]]) +// CHECK5-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[C]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 40) +// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 40) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -550,97 +576,101 @@ // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 [[TMP10]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP17]] -// CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP18]] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK5-NEXT: store i32 [[ADD16]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR8:[0-9]+]] +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 [[TMP27]] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[C]], i32 0, i32 [[TMP28]] +// CHECK5-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX9]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD8]], [[CALL10]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP14]], i32 0, i32 [[TMP29]] +// CHECK5-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD11]], [[CALL13]] +// CHECK5-NEXT: store i32 [[ADD14]], ptr [[TMP8]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK5-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP33]]) +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK5-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[C]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -414,6 +414,7 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -428,69 +429,78 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], ptr [[TMP3]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK1-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 -// CHECK1-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 -// CHECK1-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP12]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK1-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK1-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP19]]) // CHECK1-NEXT: ret void // // @@ -679,38 +689,44 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[TMP0]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK1-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -784,33 +800,38 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK1-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -845,7 +866,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR9:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -871,7 +892,8 @@ // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -882,12 +904,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -895,30 +922,31 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK2-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -982,7 +1010,7 @@ // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR9]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1001,7 +1029,8 @@ // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -1021,23 +1050,28 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 48) +// CHECK2-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP17]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_1]] [[TMP19]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP18]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP17]], i64 48) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1045,73 +1079,70 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 -// CHECK2-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 -// CHECK2-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK2-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK2-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP27]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -1120,7 +1151,8 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 @@ -1129,10 +1161,15 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 8) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1140,25 +1177,27 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[T:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK2-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK2-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -1222,7 +1261,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR7:[0-9]+]] +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR9:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1241,7 +1280,8 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -1261,23 +1301,28 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 48) +// CHECK3-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP17]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON]] [[TMP19]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP18]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP17]], i64 48) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1285,73 +1330,70 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK3-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[ARGC5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[B6]], align 4 -// CHECK3-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[C8]], align 4 -// CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[A10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK3-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK3-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC]], ptr [[TMP20]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP27]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -1379,7 +1421,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR7]] +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR9]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1405,7 +1447,8 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1416,12 +1459,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON_1]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1429,30 +1477,31 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK3-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -1461,7 +1510,8 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 @@ -1470,10 +1520,15 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1481,24 +1536,26 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[T:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK3-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK3-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR9]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp --- a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp @@ -141,23 +141,14 @@ // CHECK: [[PTRS_GEP:%.+]] = getelementptr {{.+}} [5 x ptr], ptr [[PTRS]], {{.+}} 0, {{.+}} 0 // CHECK: define internal void @{{.+}}omp_loop_ref{{.+}}( -// CHECK: [[BODY:%body.addr]] = alloca ptr -// CHECK: [[TMP:%tmp]] = alloca ptr -// CHECK: [[BODY_REF:%body_ref]] = alloca ptr -// CHECK: [[REF_TMP:%ref.tmp]] = alloca %class.anon.1 -// CHECK: [[TMP8:%tmp.+]] = alloca ptr -// CHECK: [[L0:%.+]] = load ptr, ptr [[BODY]] -// CHECK: store ptr [[L0]], ptr [[TMP]] -// CHECK: [[L5:%.+]] = load ptr, ptr [[TMP]] -// CHECK-NOT [[L6:%.+]] = load ptr, ptr [[TMP]] -// CHECK-NOT [[L7:%.+]] = load ptr, ptr [[TMP]] -// CHECK: store ptr [[REF_TMP]], ptr [[BODY_REF]] -// CHECK:[[L47:%.+]] = load ptr, ptr [[BODY_REF]] -// CHECK: store ptr [[L47]], ptr [[TMP8]] -// CHECK: [[L48:%.+]] = load ptr, ptr [[TMP8]] -// CHECK-NOT: [[L49:%.+]] = load ptr, ptr [[TMP8]] -// CHECK-NOT: [[L50:%.+]] = load ptr, ptr [[TMP8]] -// CHECK: ret void +// CHECK: [[BODY_REF:%.+]] = alloca ptr +// CHECK: [[TMP:%.+]] = alloca ptr +// CHECK: [[BODY_REF2:%.+]] = alloca %class.anon.6 +// CHECK: [[TMP2:%.+]] = alloca ptr +// CHECK: store ptr [[BODY_ARG:%.+]], ptr [[BODY_REF]] +// CHECK: [[REG:%.+]] = load ptr, ptr [[BODY_REF]] +// CHECK: store ptr [[REG]], ptr [[TMP]] +// CHECK: [[REG2:%.+]] = load ptr, ptr [[TMP]] // CHECK: define internal void @{{.+}}xoo{{.+}}( // CHECK: [[FADDR:%f.addr]] = alloca ptr diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -28,13 +28,19 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -42,33 +48,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z3usev // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -76,18 +91,21 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -96,21 +114,27 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -118,33 +142,42 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z3usev // CHECK2-SAME: () #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i32 1) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR7:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -152,18 +185,21 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -172,7 +208,7 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -37,7 +37,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) @@ -45,10 +46,15 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -59,26 +65,34 @@ // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP2]]) #[[ATTR8]] // CHECK1-NEXT: ret void // // @@ -92,26 +106,26 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP3]]) #[[ATTR8]] // CHECK1-NEXT: ret void // // @@ -135,11 +149,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -147,7 +159,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) @@ -155,10 +168,15 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -169,26 +187,34 @@ // CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP2]]) #[[ATTR8]] // CHECK2-NEXT: ret void // // @@ -202,26 +228,26 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP3]]) #[[ATTR8]] // CHECK2-NEXT: ret void // // @@ -245,10 +271,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -77,20 +77,38 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) +// CHECK1-NEXT: [[TMP5:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], ptr [[TMP6]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 1) +// CHECK1-NEXT: [[TMP8:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG4]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void @@ -99,13 +117,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 42, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -120,19 +141,22 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 43, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -147,19 +171,22 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 44, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -174,8 +201,8 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -186,7 +213,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -197,21 +225,26 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP5]], ptr [[TMP4]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1000 +// CHECK1-NEXT: [[TMP7:%.*]] = zext i1 [[CMP]] to i32 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP7]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i64 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 // CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void @@ -220,17 +253,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 45, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -244,8 +280,8 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -253,7 +289,8 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -263,11 +300,16 @@ // CHECK1-NEXT: [[A1:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -277,42 +319,44 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK1: omp.critical.loop: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK1: omp.critical.test: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK1: omp.critical.test: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK1: omp.critical.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") // CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK1: omp.critical.sync: -// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK1: omp.critical.exit: // CHECK1-NEXT: ret void @@ -328,11 +372,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -340,20 +382,38 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], ptr [[TMP6]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG4]], ptr [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void @@ -362,13 +422,16 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 42, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -383,19 +446,22 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 43, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -410,19 +476,22 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 44, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -437,8 +506,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -449,7 +518,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -460,21 +530,26 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP5]], ptr [[TMP4]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1000 +// CHECK2-NEXT: [[TMP7:%.*]] = zext i1 [[CMP]] to i32 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP7]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 // CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void @@ -483,17 +558,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 45, ptr [[A]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // @@ -507,8 +585,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -516,7 +594,8 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -526,11 +605,16 @@ // CHECK2-NEXT: [[A1:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -540,42 +624,44 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK2-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK2: omp.critical.loop: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK2: omp.critical.test: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK2: omp.critical.test: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK2-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK2: omp.critical.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") // CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK2: omp.critical.sync: -// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK2: omp.critical.exit: // CHECK2-NEXT: ret void @@ -591,10 +677,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -37,7 +37,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 @@ -49,14 +50,19 @@ // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP8:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP8]], ptr [[TMP7]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP7]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP6]], i64 16) // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -66,12 +72,11 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -81,77 +86,79 @@ // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK: omp.dispatch.cond: -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK: omp.dispatch.body: // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP12]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP15]] // CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK: omp.dispatch.inc: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK: omp.dispatch.end: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK-NEXT: ret void // // @@ -165,12 +172,8 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -149,7 +149,8 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 @@ -158,11 +159,16 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 16) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -170,27 +176,28 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK1-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -203,7 +210,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -228,7 +235,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK1-SAME: (i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 @@ -307,7 +314,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -319,7 +326,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -358,7 +365,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK1-SAME: (ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 @@ -404,27 +411,33 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[F]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 16) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4) -// CHECK1-NEXT: ret i32 [[TMP4]] +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK1-SAME: () #[[ATTR4]] { +// CHECK1-SAME: () #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -440,7 +453,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -472,30 +485,31 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -504,13 +518,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -519,7 +529,8 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 @@ -528,11 +539,16 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i32 8) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -540,27 +556,28 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK2-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -573,7 +590,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -598,7 +615,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK2-SAME: (i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 @@ -677,7 +694,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -689,7 +706,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -728,7 +745,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK2-SAME: (ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 @@ -774,27 +791,33 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[F]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 8) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4) -// CHECK2-NEXT: ret i32 [[TMP4]] +// CHECK2-NEXT: ret i32 [[TMP7]] // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK2-SAME: () #[[ATTR4]] { +// CHECK2-SAME: () #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -810,7 +833,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -842,30 +865,31 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -874,12 +898,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -55,7 +55,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -63,9 +64,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -73,20 +79,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -96,7 +104,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -108,13 +117,18 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -122,32 +136,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -156,7 +170,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -164,9 +179,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -174,20 +194,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -197,7 +219,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -209,13 +232,18 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -223,32 +251,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -50,7 +50,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -58,9 +59,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -68,20 +74,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -92,7 +100,8 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -106,13 +115,18 @@ // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP9:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP9]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -120,32 +134,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -154,7 +168,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -162,9 +177,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -172,20 +192,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -196,7 +218,8 @@ // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -210,13 +233,18 @@ // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: [[TMP9:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP9]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP10]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP9]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -224,32 +252,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -179,13 +179,19 @@ // CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK45-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-64-NEXT: entry: -// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK45-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK45-64-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK45-64-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK45-64-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK45-64-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK45-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -193,12 +199,15 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK45-64-NEXT: ret void // // @@ -206,21 +215,23 @@ // CHECK45-64-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK45-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -// CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 -// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK45-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK45-64-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 2) +// CHECK45-64-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK45-64-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK45-64-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK45-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 2) // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -228,19 +239,24 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK45-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK45-64-NEXT: ret void // // @@ -250,9 +266,8 @@ // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK45-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -262,21 +277,20 @@ // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK45-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK45-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr -// CHECK45-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 -// CHECK45-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK45-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK45-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-64-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK45-64-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK45-64-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK45-64-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK45-64-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK45-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 16) // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -284,30 +298,36 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK45-64-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK45-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK45-64-NEXT: ret void // @@ -315,13 +335,19 @@ // CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK45-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-NEXT: entry: -// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK45-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK45-32-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK45-32-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK45-32-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK45-32-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK45-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -329,12 +355,15 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK45-32-NEXT: ret void // // @@ -342,21 +371,23 @@ // CHECK45-32-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK45-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr -// CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK45-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK45-32-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 2) +// CHECK45-32-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK45-32-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK45-32-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK45-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 2) // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -364,19 +395,24 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK45-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK45-32-NEXT: ret void // // @@ -386,9 +422,8 @@ // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK45-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -398,21 +433,20 @@ // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK45-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr -// CHECK45-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 -// CHECK45-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK45-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr -// CHECK45-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK45-32-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK45-32-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK45-32-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK45-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -420,30 +454,36 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK45-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK45-32-NEXT: ret void // @@ -451,13 +491,19 @@ // CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK45-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK45-32-EX-NEXT: entry: -// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK45-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK45-32-EX-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK45-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -465,12 +511,15 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK45-32-EX-NEXT: ret void // // @@ -478,21 +527,23 @@ // CHECK45-32-EX-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK45-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr -// CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 2) +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK45-32-EX-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK45-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 2) // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -500,19 +551,24 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK45-32-EX-NEXT: ret void // // @@ -522,9 +578,8 @@ // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK45-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -534,21 +589,20 @@ // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr -// CHECK45-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 -// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr -// CHECK45-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK45-32-EX-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK45-32-EX-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK45-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -556,30 +610,36 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK45-32-EX-NEXT: ret void // @@ -587,13 +647,19 @@ // CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK-64-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK-64-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-64-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -601,12 +667,15 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-64-NEXT: ret void // // @@ -614,21 +683,23 @@ // CHECK-64-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr -// CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 2) +// CHECK-64-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK-64-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 2) // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -636,19 +707,24 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK-64-NEXT: ret void // // @@ -658,9 +734,8 @@ // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -670,21 +745,20 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr -// CHECK-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-64-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK-64-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 16) // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -692,30 +766,36 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK-64-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK-64-NEXT: ret void // @@ -723,13 +803,19 @@ // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -737,12 +823,15 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK-32-NEXT: ret void // // @@ -750,21 +839,23 @@ // CHECK-32-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr -// CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 2) +// CHECK-32-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK-32-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 2) // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -772,19 +863,24 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK-32-NEXT: ret void // // @@ -794,9 +890,8 @@ // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -806,21 +901,20 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr -// CHECK-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr -// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 -// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK-32-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK-32-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -828,30 +922,36 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK-32-NEXT: ret void // @@ -859,13 +959,19 @@ // CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27 // CHECK-32-EX-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK-32-EX-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -873,12 +979,15 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK-32-EX-NEXT: ret void // // @@ -886,21 +995,23 @@ // CHECK-32-EX-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 2, i1 false) // CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr -// CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 2) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 2 +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 2 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP5]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 2) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -908,19 +1019,24 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK-32-EX-NEXT: ret void // // @@ -930,9 +1046,8 @@ // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -942,21 +1057,20 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr -// CHECK-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr -// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK-32-EX-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP9]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -964,30 +1078,36 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK-32-EX-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -101,7 +101,8 @@ // CHECK-64-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 // CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -109,11 +110,16 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i64 1) +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store double* [[TMP0]], double** [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 8) +// CHECK-64-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK-64-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK-64-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i64 8) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -121,36 +127,38 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK-64-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-64-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK-64-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 -// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-64-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i64 8, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) -// CHECK-64-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 -// CHECK-64-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 8 +// CHECK-64-NEXT: store double 0.000000e+00, double* [[E]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load double, double* [[E]], align 8 +// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK-64-NEXT: store double [[ADD]], double* [[E]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast double* [[E]] to i8* +// CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i64 8, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-64-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-64-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 -// CHECK-64-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-64-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load double, double* [[E]], align 8 +// CHECK-64-NEXT: [[ADD1:%.*]] = fadd double [[TMP11]], [[TMP12]] +// CHECK-64-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: // CHECK-64-NEXT: ret void @@ -298,7 +306,8 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 // CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 // CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 @@ -308,13 +317,18 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-64-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i64 2) +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store float* [[TMP1]], float** [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.0* +// CHECK-64-NEXT: store [[STRUCT_ANON_0]] [[TMP9]], %struct.anon.0* [[TMP10]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i64 16) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -322,55 +336,56 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK-64-NEXT: [[C1:%.*]] = alloca i8, align 1 -// CHECK-64-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 +// CHECK-64-NEXT: [[C:%.*]] = alloca i8, align 1 +// CHECK-64-NEXT: [[D:%.*]] = alloca float, align 4 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK-64-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-64-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-64-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-64-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 8 +// CHECK-64-NEXT: store i8 0, i8* [[C]], align 1 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[D]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK-64-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK-64-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-64-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-64-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 -// CHECK-64-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-64-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: store i8* [[C1]], i8** [[TMP6]], align 8 -// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i64 16, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) -// CHECK-64-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-64-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK-64-NEXT: [[TMP6:%.*]] = load float, float* [[D]], align 4 +// CHECK-64-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK-64-NEXT: store float [[MUL]], float* [[D]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* [[C]], i8** [[TMP9]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast float* [[D]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 2, i64 16, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-64-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +// CHECK-64-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK-64-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-64-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK-64-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-64-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-64-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-64-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 -// CHECK-64-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-64-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK-64-NEXT: [[CONV2:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK-64-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK-64-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK-64-NEXT: [[TMP17:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load float, float* [[D]], align 4 +// CHECK-64-NEXT: [[MUL6:%.*]] = fmul float [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: // CHECK-64-NEXT: ret void @@ -554,7 +569,8 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-64-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 // CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 @@ -564,14 +580,18 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-64-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i64 2) +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store i32* [[TMP0]], i32** [[TMP4]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store i16* [[TMP1]], i16** [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-64-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i64 16) +// CHECK-64-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.1* +// CHECK-64-NEXT: store [[STRUCT_ANON_1]] [[TMP9]], %struct.anon.1* [[TMP10]], align 8 +// CHECK-64-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-64-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i64 16) // CHECK-64-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -579,73 +599,74 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK-64-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK-64-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 // CHECK-64-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK-64-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-64-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-64-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-64-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-64-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK-64-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-64-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-64-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-64-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-64-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) -// CHECK-64-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-64-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK-64-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK-64-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 2, i64 16, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-64-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-64: .omp.reduction.then: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-64-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-64-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-64-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-64-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK-64-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK-64: cond.true9: -// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-64-NEXT: br label [[COND_END11:%.*]] -// CHECK-64: cond.false10: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-64-NEXT: br label [[COND_END11]] -// CHECK-64: cond.end11: -// CHECK-64-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-64-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK-64-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-64-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-64-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK-64: cond.true7: +// CHECK-64-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-64-NEXT: br label [[COND_END9:%.*]] +// CHECK-64: cond.false8: +// CHECK-64-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-64-NEXT: br label [[COND_END9]] +// CHECK-64: cond.end9: +// CHECK-64-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK-64-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK-64-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-64: .omp.reduction.done: // CHECK-64-NEXT: ret void @@ -833,7 +854,8 @@ // CHECK-32-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -841,11 +863,16 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store double* [[TMP0]], double** [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK-32-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK-32-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -853,36 +880,38 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK-32-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-32-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 -// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-32-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) -// CHECK-32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 -// CHECK-32-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK-32-NEXT: store double 0.000000e+00, double* [[E]], align 8 +// CHECK-32-NEXT: [[TMP3:%.*]] = load double, double* [[E]], align 8 +// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK-32-NEXT: store double [[ADD]], double* [[E]], align 8 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast double* [[E]] to i8* +// CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 4, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-32-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 -// CHECK-32-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-32-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK-32-NEXT: [[TMP12:%.*]] = load double, double* [[E]], align 8 +// CHECK-32-NEXT: [[ADD1:%.*]] = fadd double [[TMP11]], [[TMP12]] +// CHECK-32-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: // CHECK-32-NEXT: ret void @@ -1030,7 +1059,8 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 // CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 // CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 @@ -1040,13 +1070,18 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-32-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i32 2) +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store float* [[TMP1]], float** [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.0* +// CHECK-32-NEXT: store [[STRUCT_ANON_0]] [[TMP9]], %struct.anon.0* [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i32 8) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -1054,55 +1089,56 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-NEXT: [[C1:%.*]] = alloca i8, align 1 -// CHECK-32-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-NEXT: [[C:%.*]] = alloca i8, align 1 +// CHECK-32-NEXT: [[D:%.*]] = alloca float, align 4 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-32-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-32-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-32-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 4 +// CHECK-32-NEXT: store i8 0, i8* [[C]], align 1 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[D]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK-32-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK-32-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-32-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-32-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 -// CHECK-32-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-32-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) -// CHECK-32-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-32-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK-32-NEXT: [[TMP6:%.*]] = load float, float* [[D]], align 4 +// CHECK-32-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK-32-NEXT: store float [[MUL]], float* [[D]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* [[C]], i8** [[TMP9]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast float* [[D]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 2, i32 8, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +// CHECK-32-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK-32-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-32-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK-32-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-32-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-32-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 -// CHECK-32-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-32-NEXT: [[CONV3:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK-32-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK-32-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK-32-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK-32-NEXT: [[TMP17:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load float, float* [[D]], align 4 +// CHECK-32-NEXT: [[MUL6:%.*]] = fmul float [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: // CHECK-32-NEXT: ret void @@ -1286,7 +1322,8 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK-32-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 @@ -1296,14 +1333,18 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-32-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store i32* [[TMP0]], i32** [[TMP4]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store i16* [[TMP1]], i16** [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.1* +// CHECK-32-NEXT: store [[STRUCT_ANON_1]] [[TMP9]], %struct.anon.1* [[TMP10]], align 4 +// CHECK-32-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-32-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i32 8) // CHECK-32-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -1311,73 +1352,74 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK-32-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-32-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-32-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-32-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK-32-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-32-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-32-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) -// CHECK-32-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-32-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK-32-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK-32-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 2, i32 8, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32: .omp.reduction.then: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-32-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-32-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK-32-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK-32-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK-32: cond.true9: -// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-32-NEXT: br label [[COND_END11:%.*]] -// CHECK-32: cond.false10: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-NEXT: br label [[COND_END11]] -// CHECK-32: cond.end11: -// CHECK-32-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-32-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK-32-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-32-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK-32-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK-32: cond.true7: +// CHECK-32-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-32-NEXT: br label [[COND_END9:%.*]] +// CHECK-32: cond.false8: +// CHECK-32-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-NEXT: br label [[COND_END9]] +// CHECK-32: cond.end9: +// CHECK-32-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK-32-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK-32-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32: .omp.reduction.done: // CHECK-32-NEXT: ret void @@ -1565,7 +1607,8 @@ // CHECK-32-EX-SAME: (double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 2, i1 false) @@ -1573,11 +1616,16 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP0]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP4]], i8** [[TMP3]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, double*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP5]], i32 1) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store double* [[TMP0]], double** [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = bitcast %struct.anon* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 4) +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP4]], i8* [[TMP5]]) +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to %struct.anon* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON]] [[TMP7]], %struct.anon* [[TMP8]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon*)* @__omp_outlined__ to i8*), i8* null, i8* [[TMP6]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP5]], i32 4) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -1585,36 +1633,38 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK-32-EX-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 4 +// CHECK-32-EX-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK-32-EX-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK-32-EX-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast double* [[E1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 4, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 -// CHECK-32-EX-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load double*, double** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store double 0.000000e+00, double* [[E]], align 8 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load double, double* [[E]], align 8 +// CHECK-32-EX-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK-32-EX-NEXT: store double [[ADD]], double* [[E]], align 8 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast double* [[E]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 4, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load double, double* [[E1]], align 8 -// CHECK-32-EX-NEXT: [[ADD2:%.*]] = fadd double [[TMP9]], [[TMP10]] -// CHECK-32-EX-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load double, double* [[TMP2]], align 8 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load double, double* [[E]], align 8 +// CHECK-32-EX-NEXT: [[ADD1:%.*]] = fadd double [[TMP11]], [[TMP12]] +// CHECK-32-EX-NEXT: store double [[ADD1]], double* [[TMP2]], align 8 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: // CHECK-32-EX-NEXT: ret void @@ -1762,7 +1812,8 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 // CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 // CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 @@ -1772,13 +1823,18 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-32-EX-NEXT: store i8* [[TMP0]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP6]], i8** [[TMP5]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i8*, float*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP7]], i32 2) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store float* [[TMP1]], float** [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.0* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_0]], %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.0* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_0]] [[TMP9]], %struct.anon.0* [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i32 8) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -1786,55 +1842,56 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8* noundef nonnull align 1 dereferenceable(1) [[C:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.0* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK-32-EX-NEXT: [[C1:%.*]] = alloca i8, align 1 -// CHECK-32-EX-NEXT: [[D2:%.*]] = alloca float, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 4 +// CHECK-32-EX-NEXT: [[C:%.*]] = alloca i8, align 1 +// CHECK-32-EX-NEXT: [[D:%.*]] = alloca float, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i8 0, i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK-32-EX-NEXT: store %struct.anon.0* [[__CONTEXT]], %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8*, i8** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load float*, float** [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i8 0, i8* [[C]], align 1 +// CHECK-32-EX-NEXT: store float 1.000000e+00, float* [[D]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK-32-EX-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK-32-EX-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK-32-EX-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK-32-EX-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast float* [[D2]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 2, i32 8, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK-32-EX-NEXT: store i8 [[CONV1]], i8* [[C]], align 1 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load float, float* [[D]], align 4 +// CHECK-32-EX-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK-32-EX-NEXT: store float [[MUL]], float* [[D]], align 4 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i8* [[C]], i8** [[TMP9]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast float* [[D]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 2, i32 8, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4) +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK-32-EX-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i8, i8* [[C1]], align 1 -// CHECK-32-EX-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK-32-EX-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK-32-EX-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load float, float* [[D2]], align 4 -// CHECK-32-EX-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP2]], align 1 +// CHECK-32-EX-NEXT: [[CONV2:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i8, i8* [[C]], align 1 +// CHECK-32-EX-NEXT: [[CONV3:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK-32-EX-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK-32-EX-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK-32-EX-NEXT: store i8 [[CONV5]], i8* [[TMP2]], align 1 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load float, float* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load float, float* [[D]], align 4 +// CHECK-32-EX-NEXT: [[MUL6:%.*]] = fmul float [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store float [[MUL6]], float* [[TMP4]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: // CHECK-32-EX-NEXT: ret void @@ -2018,7 +2075,8 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 +// CHECK-32-EX-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 // CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 @@ -2028,14 +2086,18 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store i32* [[TMP0]], i32** [[TMP4]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store i16* [[TMP1]], i16** [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = bitcast %struct.anon.1* [[DOTTMP_OUTLINED_AGG_ARG]] to i8* +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call align 8 i8* @__kmpc_alloc_shared(i32 8) +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = call i8* @__kmpc_alloc_aggregate_arg(i8* [[TMP6]], i8* [[TMP7]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_1]], %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP8]] to %struct.anon.1* +// CHECK-32-EX-NEXT: store [[STRUCT_ANON_1]] [[TMP9]], %struct.anon.1* [[TMP10]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.anon.1*)* @__omp_outlined__5 to i8*), i8* null, i8* [[TMP8]]) +// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(i8* [[TMP7]], i32 8) // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -2043,73 +2105,74 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.1* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.1*, align 4 +// CHECK-32-EX-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK-32-EX-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK-32-EX-NEXT: store %struct.anon.1* [[__CONTEXT]], %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-EX-NEXT: store i16 -32768, i16* [[B]], align 2 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-EX-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK-32-EX-NEXT: store i32 [[OR]], i32* [[A]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-EX-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK-32-EX-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK-32-EX-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK-32-EX-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK-32-EX-NEXT: store i16 [[CONV2]], i16* [[B]], align 2 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = bitcast i32* [[A]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = bitcast i16* [[B]] to i8* +// CHECK-32-EX-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 2, i32 8, i8* [[TMP14]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK-32-EX: .omp.reduction.then: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK-32-EX-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-32-EX-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK-32-EX-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK-32-EX-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK-32-EX: cond.true9: -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK-32-EX-NEXT: br label [[COND_END11:%.*]] -// CHECK-32-EX: cond.false10: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK-32-EX-NEXT: br label [[COND_END11]] -// CHECK-32-EX: cond.end11: -// CHECK-32-EX-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK-32-EX-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-EX-NEXT: [[OR3:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK-32-EX-NEXT: store i32 [[OR3]], i32* [[TMP2]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-32-EX-NEXT: [[CONV4:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-EX-NEXT: [[CONV5:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK-32-EX: cond.true7: +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP4]], align 2 +// CHECK-32-EX-NEXT: br label [[COND_END9:%.*]] +// CHECK-32-EX: cond.false8: +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i16, i16* [[B]], align 2 +// CHECK-32-EX-NEXT: br label [[COND_END9]] +// CHECK-32-EX: cond.end9: +// CHECK-32-EX-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE7]] ], [ [[TMP22]], [[COND_FALSE8]] ] +// CHECK-32-EX-NEXT: store i16 [[COND10]], i16* [[TMP4]], align 2 +// CHECK-32-EX-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK-32-EX: .omp.reduction.done: // CHECK-32-EX-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -33,6 +33,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) @@ -42,7 +43,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -50,10 +51,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -63,9 +65,12 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) @@ -79,68 +84,73 @@ // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[IB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR12:[0-9]+]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP13]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP14:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP14]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[IB]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] @@ -165,18 +175,16 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[THIS1]], ptr nonnull align 4 dereferenceable(4) [[TMP0]], ptr nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[THIS1]], ptr nonnull align 4 dereferenceable(4) [[TMP0]], ptr nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -187,160 +195,162 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex", align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IV]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_LB]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP5]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP5]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP23]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP22]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP24]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP27]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP13]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP28]] to float +// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP14]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to float -// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to float -// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP29]] to float +// CHECK1-NEXT: store float [[CONV16]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP13]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP13]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP13]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP34]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP36]], 1 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP38]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP40]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP34]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP6]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] @@ -363,13 +373,13 @@ // CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -378,7 +388,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -443,7 +453,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -501,7 +511,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -510,21 +520,16 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true) @@ -534,7 +539,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -542,10 +547,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -555,9 +561,12 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16) @@ -571,68 +580,73 @@ // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[IB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP13]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP14:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP14]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[IB]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] @@ -657,18 +671,16 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[THIS1]], ptr nonnull align 8 dereferenceable(8) [[TMP0]], ptr nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[THIS1]], ptr nonnull align 8 dereferenceable(8) [[TMP0]], ptr nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -679,160 +691,162 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex.2", align 8 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IV]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_LB]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP5]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP5]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP23]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP22]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP24]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP27]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[REF_TMP13]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP28]] to double +// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP14]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double -// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK1-NEXT: store double [[CONV16]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP13]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP13]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[REF_TMP13]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]]) -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP34]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func5, ptr @_omp_reduction_inter_warp_copy_func6) -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i32 [[TMP36]], 1 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP38]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func5, ptr @_omp_reduction_inter_warp_copy_func6) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP40]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP34]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP6]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] @@ -855,14 +869,14 @@ // CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR12]] -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR12]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA26]] @@ -870,14 +884,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] @@ -889,7 +903,7 @@ // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex.0", ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex.2", ptr [[TMP9]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: // CHECK1-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] @@ -948,7 +962,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1006,7 +1020,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1015,15 +1029,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -1080,11 +1088,11 @@ // CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA26]] @@ -1097,7 +1105,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: ret double [[TMP0]] // @@ -1108,7 +1116,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK1-NEXT: ret double [[TMP0]] // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -53,7 +53,7 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -62,12 +62,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[A_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], ptr [[TMP2]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -75,15 +75,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i8 49, ptr [[A_ADDR]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 +// CHECK1-NEXT: store i8 49, ptr [[A]], align 1 // CHECK1-NEXT: ret void // // @@ -91,7 +96,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -100,12 +105,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -113,15 +118,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i16 1, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: store i16 1, ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -129,7 +139,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -138,12 +148,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -151,53 +161,74 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i16 1, ptr [[TMP0]], align 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i16 1, ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -205,7 +236,7 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -214,12 +245,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 -// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[A_ADDR]], align 1 +// CHECK2-NEXT: store i8 [[TMP3]], ptr [[TMP2]], align 1 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -227,15 +258,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i8 49, ptr [[A_ADDR]], align 1 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 +// CHECK2-NEXT: store i8 49, ptr [[A]], align 1 // CHECK2-NEXT: ret void // // @@ -243,7 +279,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -252,12 +288,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -265,15 +301,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store i16 1, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK2-NEXT: store i16 1, ptr [[AA]], align 2 // CHECK2-NEXT: ret void // // @@ -281,7 +322,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -290,12 +331,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -303,52 +344,73 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[AA]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store i16 1, ptr [[TMP0]], align 2 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i16 1, ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -35,6 +35,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) @@ -44,7 +45,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -52,83 +53,94 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[I]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP10]], i64 8) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -142,17 +154,16 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true) @@ -162,7 +173,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -170,83 +181,94 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[I]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP10]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP11]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP10]], i32 4) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[I]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -260,10 +282,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR5]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -77,8 +77,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -90,15 +89,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -106,13 +107,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -122,129 +123,137 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP53]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -254,15 +263,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -275,109 +284,119 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -390,7 +409,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -401,12 +420,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -414,12 +435,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -429,129 +450,139 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -564,56 +595,64 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 @@ -621,17 +660,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -641,6 +680,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -650,9 +690,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -660,106 +702,118 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -769,56 +823,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -827,7 +887,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -838,12 +898,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -851,12 +913,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -864,106 +926,116 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK1: cond.true6: // CHECK1-NEXT: br label [[COND_END8:%.*]] // CHECK1: cond.false7: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END8]] // CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -976,75 +1048,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 // CHECK1-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1053,7 +1133,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1064,12 +1144,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1077,12 +1159,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1095,25 +1177,31 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -1121,116 +1209,120 @@ // CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP30]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_11]] [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP34]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP31]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP30]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] // CHECK1: cond.true18: -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: br label [[COND_END20:%.*]] // CHECK1: cond.false19: -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END20]] // CHECK1: cond.end20: -// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE18]] ], [ [[TMP39]], [[COND_FALSE19]] ] +// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE18]] ], [ [[TMP44]], [[COND_FALSE19]] ] // CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP42]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1247,21 +1339,29 @@ // CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -1269,90 +1369,90 @@ // CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] // CHECK1-NEXT: [[CONV15:%.*]] = sext i32 [[MUL14]] to i64 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP15]], [[CONV15]] +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP23]], [[CONV15]] // CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 -// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP18]], [[CONV22]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP26]], [[CONV22]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK1-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK1-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV23]], [[CONV27]] -// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP17]], [[MUL28]] +// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP25]], [[MUL28]] // CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 // CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]] // CHECK1-NEXT: store i32 [[ADD33]], ptr [[ARRAYIDX35]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1364,7 +1464,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1376,13 +1476,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1390,13 +1494,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1406,134 +1509,143 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_13]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP33]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE10]] ], [ [[TMP46]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1546,76 +1658,84 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1627,8 +1747,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1640,15 +1759,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1656,13 +1777,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1672,129 +1793,137 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK2-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP53]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1804,15 +1933,15 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1825,109 +1954,119 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1940,7 +2079,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1951,12 +2090,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1964,12 +2105,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1979,129 +2120,139 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK2-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2114,56 +2265,64 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK2-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK2-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 @@ -2171,17 +2330,17 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2191,6 +2350,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -2200,9 +2360,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2210,106 +2372,118 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK2-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK2-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]) +// CHECK2-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2319,56 +2493,62 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -2377,7 +2557,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -2388,12 +2568,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2401,12 +2583,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2414,106 +2596,116 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]) +// CHECK2-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: -// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2526,75 +2718,83 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 // CHECK2-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK2-NEXT: ret void // // @@ -2603,7 +2803,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -2614,12 +2814,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2627,12 +2829,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2645,141 +2847,151 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_11]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP33]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] // CHECK2: cond.true16: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END18:%.*]] // CHECK2: cond.false17: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END18]] // CHECK2: cond.end18: -// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE16]] ], [ [[TMP41]], [[COND_FALSE17]] ] +// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE16]] ], [ [[TMP46]], [[COND_FALSE17]] ] // CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2796,108 +3008,116 @@ // CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP17]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP14]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP22]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] -// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP15]], [[MUL15]] +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP23]], [[MUL15]] // CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I9]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 // CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]] -// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP18]], [[MUL20]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP26]], [[MUL20]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV21]], [[MUL24]] -// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP17]], [[MUL25]] +// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP25]], [[MUL25]] // CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1 // CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]] // CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP32]] to i64 // CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]] // CHECK2-NEXT: store i32 [[ADD29]], ptr [[ARRAYIDX31]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2909,7 +3129,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -2921,13 +3141,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2935,13 +3159,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2951,134 +3174,143 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_13]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP33]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE10]] ], [ [[TMP46]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3091,76 +3323,84 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] -// CHECK2-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -3172,8 +3412,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3185,15 +3424,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3201,13 +3442,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3217,127 +3458,135 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK3-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK3-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK3-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP46]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP51]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3347,15 +3596,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3368,104 +3617,114 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP29]] // CHECK3-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3478,7 +3737,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3489,12 +3748,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3502,12 +3763,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3517,127 +3778,137 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP28:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP27]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_2]] [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP28]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP27]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) +// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3650,52 +3921,60 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2 @@ -3703,17 +3982,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3723,6 +4002,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -3732,9 +4012,11 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3742,104 +4024,116 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK3-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_4]] [[TMP18]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP17]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i32 12) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3849,52 +4143,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -3903,7 +4203,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -3914,12 +4214,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3927,12 +4229,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3940,104 +4242,114 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP20]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_6]] [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP21]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP20]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP29]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP30]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4050,70 +4362,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[J]], align 4 // CHECK3-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -4122,7 +4442,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -4133,12 +4453,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4146,12 +4468,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4164,25 +4486,31 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4190,118 +4518,122 @@ // CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to ptr -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to ptr -// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to ptr -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_8]] [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP33]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP41]], [[TMP42]] // CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP43]], [[TMP44]] // CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] // CHECK3: cond.true18: -// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END20:%.*]] // CHECK3: cond.false19: -// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END20]] // CHECK3: cond.end20: -// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE18]] ], [ [[TMP41]], [[COND_FALSE19]] ] +// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP45]], [[COND_TRUE18]] ], [ [[TMP46]], [[COND_FALSE19]] ] // CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP47]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4318,21 +4650,29 @@ // CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4340,91 +4680,91 @@ // CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP17]] to i64 // CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[CONV13]] // CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] // CHECK3-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] // CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] // CHECK3-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 // CHECK3-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] // CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 // CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP32]] // CHECK3-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX37]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -4436,7 +4776,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -4448,13 +4788,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4462,13 +4806,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4478,132 +4821,141 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP30]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_10]] [[TMP32]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP34]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP31]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP30]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP47]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4616,71 +4968,79 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX5]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -35,8 +35,7 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -48,15 +47,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -64,13 +65,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -80,138 +81,146 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -224,104 +233,114 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP11]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP10]], [[CONV6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP20]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP22]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR7]] // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC]]) #[[ATTR7]] // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD12]], [[CALL13]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -333,8 +352,7 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -346,15 +364,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -362,13 +382,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -378,136 +398,144 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK2-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -520,100 +548,110 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[TMP8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -65,8 +65,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -78,15 +77,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -94,13 +95,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -110,141 +111,149 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP53]], 0 // CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 -// CHECK1-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 +// CHECK1-NEXT: br i1 [[TMP55]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP51]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP56]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -254,15 +263,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -275,121 +284,131 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] // CHECK1-NEXT: store i32 [[ADD16]], ptr [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -402,7 +421,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -413,12 +432,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -426,12 +447,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -441,121 +462,131 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK1-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -568,14 +599,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -588,56 +619,64 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] @@ -645,23 +684,23 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 // CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -677,6 +716,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -686,9 +726,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -696,98 +738,110 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24), !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -796,13 +850,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -812,59 +866,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -877,7 +937,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -888,12 +948,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -901,12 +963,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -914,97 +976,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32), !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK1: cond.true6: // CHECK1-NEXT: br label [[COND_END8:%.*]] // CHECK1: cond.false7: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[COND_END8]] // CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4 @@ -1014,14 +1086,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1034,78 +1106,86 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4 @@ -1120,8 +1200,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1133,15 +1212,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1149,13 +1230,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1165,139 +1246,147 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20), !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK2-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK2-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-NEXT: br i1 [[TMP53]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP49]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP54]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1307,15 +1396,15 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1328,116 +1417,126 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP29]] // CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP43]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1450,7 +1549,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1461,12 +1560,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1474,12 +1575,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1489,119 +1590,129 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP27:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP28:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP27]]), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP29:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP28]]), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP27]], i32 16), !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK2-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK2-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1614,14 +1725,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1634,52 +1745,60 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK2-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] @@ -1687,23 +1806,23 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP29]], 0 // CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1719,6 +1838,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -1728,9 +1848,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1738,96 +1860,108 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store [[STRUCT_ANON_4]] [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP17]]), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i32 12), !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK2-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1836,13 +1970,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1852,55 +1986,61 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1913,7 +2053,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -1924,12 +2064,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1937,12 +2079,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1950,95 +2092,105 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP20:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store [[STRUCT_ANON_6]] [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP21]]), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP20]], i32 16), !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP29]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: -// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP30]], [[COND_FALSE7]] ] // CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK2-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4 @@ -2048,14 +2200,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2068,73 +2220,81 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -73,8 +73,7 @@ // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -86,15 +85,17 @@ // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK45-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -102,13 +103,13 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK45-64-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -121,112 +122,118 @@ // CHECK45-64-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK45-64-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK45-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-64: omp.precond.then: // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-64: omp.dispatch.cond: -// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK45-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-64: cond.true: -// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-64-NEXT: br label [[COND_END:%.*]] // CHECK45-64: cond.false: -// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[COND_END]] // CHECK45-64: cond.end: -// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-64: omp.dispatch.body: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-64: omp.inner.for.cond: -// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK45-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-64: omp.inner.for.body: -// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-64: omp.body.continue: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-64: omp.inner.for.inc: -// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-64: omp.dispatch.inc: -// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-64: omp.dispatch.end: -// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK45-64-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK45-64-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-64: .omp.final.then: -// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-64-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK45-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK45-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK45-64: .omp.final.done: -// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK45-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-64-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK45-64-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK45-64: .omp.lastprivate.then: -// CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-64-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK45-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK45-64: .omp.lastprivate.done: // CHECK45-64-NEXT: br label [[OMP_PRECOND_END]] @@ -240,7 +247,7 @@ // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -251,12 +258,14 @@ // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -264,12 +273,12 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -282,68 +291,72 @@ // CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-64: omp.precond.then: // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-64: omp.dispatch.cond: -// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-64: cond.true: -// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-64-NEXT: br label [[COND_END:%.*]] // CHECK45-64: cond.false: -// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[COND_END]] // CHECK45-64: cond.end: -// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-64: omp.dispatch.body: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-64: omp.inner.for.cond: -// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-64: omp.inner.for.body: -// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK45-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK45-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -351,32 +364,32 @@ // CHECK45-64: omp.body.continue: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-64: omp.inner.for.inc: -// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-64: omp.dispatch.inc: -// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-64: omp.dispatch.end: -// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK45-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK45-64-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-64: .omp.final.then: -// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-64-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK45-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK45-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -392,6 +405,7 @@ // CHECK45-64-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -401,9 +415,11 @@ // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -411,11 +427,11 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -425,78 +441,80 @@ // CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-64: omp.dispatch.cond: -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-64: cond.true: // CHECK45-64-NEXT: br label [[COND_END:%.*]] // CHECK45-64: cond.false: -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[COND_END]] // CHECK45-64: cond.end: -// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK45-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-64: omp.dispatch.body: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-64: omp.inner.for.cond: -// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-64: omp.inner.for.body: -// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-64: omp.body.continue: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-64: omp.inner.for.inc: -// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-64: omp.dispatch.inc: -// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-64: omp.dispatch.end: -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK45-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK45-64-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-64: .omp.final.then: // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -509,7 +527,7 @@ // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK45-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -520,12 +538,14 @@ // CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-64: user_code.entry: // CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK45-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-64-NEXT: ret void // CHECK45-64: worker.exit: @@ -533,12 +553,12 @@ // // // CHECK45-64-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-64-NEXT: entry: // CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK45-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK45-64-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -551,97 +571,101 @@ // CHECK45-64-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK45-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK45-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-64: omp.dispatch.cond: -// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-64: cond.true: // CHECK45-64-NEXT: br label [[COND_END:%.*]] // CHECK45-64: cond.false: -// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[COND_END]] // CHECK45-64: cond.end: -// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-64: omp.dispatch.body: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-64: omp.inner.for.cond: -// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK45-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-64: omp.inner.for.body: -// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK45-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK45-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP22]] to i64 // CHECK45-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-64: omp.body.continue: // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-64: omp.inner.for.inc: -// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK45-64: omp.inner.for.end: // CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-64: omp.dispatch.inc: -// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4 // CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-64: omp.dispatch.end: -// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK45-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK45-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-64: .omp.final.then: // CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-64-NEXT: store i32 10, ptr [[J]], align 4 @@ -656,8 +680,7 @@ // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -669,15 +692,17 @@ // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -685,13 +710,13 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -704,111 +729,117 @@ // CHECK45-32-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK45-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-32: omp.precond.then: // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32: omp.dispatch.cond: -// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK45-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32: cond.true: -// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-32-NEXT: br label [[COND_END:%.*]] // CHECK45-32: cond.false: -// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[COND_END]] // CHECK45-32: cond.end: -// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32: omp.dispatch.body: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32: omp.inner.for.cond: -// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK45-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32: omp.inner.for.body: -// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i32 0, i32 [[TMP23]] // CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32: omp.body.continue: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32: omp.inner.for.inc: -// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32: omp.dispatch.inc: -// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32: omp.dispatch.end: -// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK45-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK45-32-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32: .omp.final.then: -// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK45-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK45-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK45-32: .omp.final.done: -// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK45-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-32-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK45-32-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK45-32: .omp.lastprivate.then: -// CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK45-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK45-32: .omp.lastprivate.done: // CHECK45-32-NEXT: br label [[OMP_PRECOND_END]] @@ -822,7 +853,7 @@ // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -833,12 +864,14 @@ // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -846,12 +879,12 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -864,67 +897,71 @@ // CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-32: omp.precond.then: // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32: omp.dispatch.cond: -// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32: cond.true: -// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-32-NEXT: br label [[COND_END:%.*]] // CHECK45-32: cond.false: -// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[COND_END]] // CHECK45-32: cond.end: -// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32: omp.dispatch.body: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32: omp.inner.for.cond: -// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32: omp.inner.for.body: -// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] -// CHECK45-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK45-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -932,32 +969,32 @@ // CHECK45-32: omp.body.continue: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32: omp.inner.for.inc: -// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32: omp.dispatch.inc: -// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32: omp.dispatch.end: -// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK45-32-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK45-32-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32: .omp.final.then: -// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK45-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK45-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK45-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -973,6 +1010,7 @@ // CHECK45-32-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -982,9 +1020,11 @@ // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -992,11 +1032,11 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1006,77 +1046,79 @@ // CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32: omp.dispatch.cond: -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32: cond.true: // CHECK45-32-NEXT: br label [[COND_END:%.*]] // CHECK45-32: cond.false: -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[COND_END]] // CHECK45-32: cond.end: -// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK45-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32: omp.dispatch.body: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32: omp.inner.for.cond: -// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32: omp.inner.for.body: -// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32: omp.body.continue: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32: omp.inner.for.inc: -// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32: omp.dispatch.inc: -// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32: omp.dispatch.end: -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK45-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK45-32-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32: .omp.final.then: // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1089,7 +1131,7 @@ // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -1100,12 +1142,14 @@ // CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32: user_code.entry: // CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK45-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-NEXT: ret void // CHECK45-32: worker.exit: @@ -1113,12 +1157,12 @@ // // // CHECK45-32-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-NEXT: entry: // CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1131,95 +1175,99 @@ // CHECK45-32-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK45-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32: omp.dispatch.cond: -// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32: cond.true: // CHECK45-32-NEXT: br label [[COND_END:%.*]] // CHECK45-32: cond.false: -// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[COND_END]] // CHECK45-32: cond.end: -// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32: omp.dispatch.body: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32: omp.inner.for.cond: -// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK45-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32: omp.inner.for.body: -// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK45-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK45-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] // CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP21]] +// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP22]] // CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32: omp.body.continue: // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32: omp.inner.for.inc: -// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK45-32: omp.inner.for.end: // CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32: omp.dispatch.inc: -// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32: omp.dispatch.end: -// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK45-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK45-32-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32: .omp.final.then: // CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-32-NEXT: store i32 10, ptr [[J]], align 4 @@ -1234,8 +1282,7 @@ // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1247,15 +1294,17 @@ // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK45-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -1263,13 +1312,13 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1282,111 +1331,117 @@ // CHECK45-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK45-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-32-EX: omp.precond.then: // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32-EX: omp.dispatch.cond: -// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32-EX: cond.true: -// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK45-32-EX: cond.false: -// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END]] // CHECK45-32-EX: cond.end: -// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32-EX: omp.dispatch.body: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32-EX: omp.inner.for.cond: -// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK45-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32-EX: omp.inner.for.body: -// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i32 0, i32 [[TMP23]] // CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32-EX: omp.body.continue: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32-EX: omp.inner.for.inc: -// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32-EX: omp.dispatch.inc: -// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32-EX: omp.dispatch.end: -// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK45-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32-EX: .omp.final.then: -// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-EX-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK45-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK45-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK45-32-EX: .omp.final.done: -// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK45-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK45-32-EX-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK45-32-EX: .omp.lastprivate.then: -// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK45-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK45-32-EX: .omp.lastprivate.done: // CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]] @@ -1400,7 +1455,7 @@ // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1411,12 +1466,14 @@ // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -1424,12 +1481,12 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1442,67 +1499,71 @@ // CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK45-32-EX: omp.precond.then: // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32-EX: omp.dispatch.cond: -// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32-EX: cond.true: -// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK45-32-EX: cond.false: -// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END]] // CHECK45-32-EX: cond.end: -// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32-EX: omp.dispatch.body: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32-EX: omp.inner.for.cond: -// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32-EX: omp.inner.for.body: -// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] -// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK45-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK45-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -1510,32 +1571,32 @@ // CHECK45-32-EX: omp.body.continue: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32-EX: omp.inner.for.inc: -// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32-EX: omp.dispatch.inc: -// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32-EX: omp.dispatch.end: -// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK45-32-EX-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32-EX: .omp.final.then: -// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK45-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK45-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK45-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK45-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK45-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK45-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -1551,6 +1612,7 @@ // CHECK45-32-EX-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -1560,9 +1622,11 @@ // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -1570,11 +1634,11 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1584,77 +1648,79 @@ // CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32-EX: omp.dispatch.cond: -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32-EX: cond.true: // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK45-32-EX: cond.false: -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END]] // CHECK45-32-EX: cond.end: -// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK45-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32-EX: omp.dispatch.body: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32-EX: omp.inner.for.cond: -// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32-EX: omp.inner.for.body: -// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32-EX: omp.body.continue: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32-EX: omp.inner.for.inc: -// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32-EX: omp.dispatch.inc: -// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32-EX: omp.dispatch.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK45-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32-EX: .omp.final.then: // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1667,7 +1733,7 @@ // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK45-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -1678,12 +1744,14 @@ // CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK45-32-EX: user_code.entry: // CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK45-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK45-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK45-32-EX-NEXT: ret void // CHECK45-32-EX: worker.exit: @@ -1691,12 +1759,12 @@ // // // CHECK45-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK45-32-EX-NEXT: entry: // CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK45-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK45-32-EX-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1709,95 +1777,99 @@ // CHECK45-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK45-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK45-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK45-32-EX: omp.dispatch.cond: -// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK45-32-EX: cond.true: // CHECK45-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK45-32-EX: cond.false: -// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[COND_END]] // CHECK45-32-EX: cond.end: -// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK45-32-EX: omp.dispatch.body: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK45-32-EX: omp.inner.for.cond: -// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK45-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK45-32-EX: omp.inner.for.body: -// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK45-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK45-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] // CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP21]] +// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP22]] // CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK45-32-EX: omp.body.continue: // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK45-32-EX: omp.inner.for.inc: -// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK45-32-EX: omp.inner.for.end: // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK45-32-EX: omp.dispatch.inc: -// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK45-32-EX: omp.dispatch.end: -// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK45-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK45-32-EX-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK45-32-EX: .omp.final.then: // CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK45-32-EX-NEXT: store i32 10, ptr [[J]], align 4 @@ -1812,8 +1884,7 @@ // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1825,15 +1896,17 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -1841,13 +1914,13 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1860,112 +1933,118 @@ // CHECK-64-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK-64-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-64-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-64-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-64: omp.precond.then: // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK-64-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK-64-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-64-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK-64-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK-64-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK-64-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-64: .omp.final.done: -// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-64-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK-64-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-64: .omp.lastprivate.then: -// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK-64-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-64: .omp.lastprivate.done: // CHECK-64-NEXT: br label [[OMP_PRECOND_END]] @@ -1979,7 +2058,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1990,12 +2069,14 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2003,12 +2084,12 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2021,68 +2102,72 @@ // CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-64: omp.precond.then: // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -2090,32 +2175,32 @@ // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK-64-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: -// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-64-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2131,6 +2216,7 @@ // CHECK-64-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -2140,9 +2226,11 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2150,11 +2238,11 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2164,78 +2252,80 @@ // CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK-64-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2248,7 +2338,7 @@ // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -2259,12 +2349,14 @@ // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: // CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK-64-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -2272,12 +2364,12 @@ // // // CHECK-64-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] { +// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-64-NEXT: entry: // CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-64-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2290,97 +2382,101 @@ // CHECK-64-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-64-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-64: omp.dispatch.cond: -// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-64: cond.true: // CHECK-64-NEXT: br label [[COND_END:%.*]] // CHECK-64: cond.false: -// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[COND_END]] // CHECK-64: cond.end: -// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-64: omp.dispatch.body: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-64: omp.inner.for.cond: -// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-64: omp.inner.for.body: -// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP22]] to i64 // CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-64: omp.body.continue: // CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-64: omp.inner.for.inc: -// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK-64: omp.inner.for.end: // CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-64: omp.dispatch.inc: -// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4 -// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4 // CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-64: omp.dispatch.end: -// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-64-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-64-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-64: .omp.final.then: // CHECK-64-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-64-NEXT: store i32 10, ptr [[J]], align 4 @@ -2395,8 +2491,7 @@ // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2408,15 +2503,17 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -2424,13 +2521,13 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2443,111 +2540,117 @@ // CHECK-32-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK-32-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-32-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-32-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-32: omp.precond.then: // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK-32-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i32 0, i32 [[TMP23]] // CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK-32-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK-32-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK-32-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK-32-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32: .omp.final.done: -// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK-32-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32: .omp.lastprivate.then: -// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK-32-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32: .omp.lastprivate.done: // CHECK-32-NEXT: br label [[OMP_PRECOND_END]] @@ -2561,7 +2664,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2572,12 +2675,14 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -2585,12 +2690,12 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2603,67 +2708,71 @@ // CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-32: omp.precond.then: // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] -// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -2671,32 +2780,32 @@ // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK-32-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: -// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2712,6 +2821,7 @@ // CHECK-32-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -2721,9 +2831,11 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -2731,11 +2843,11 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2745,77 +2857,79 @@ // CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK-32-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2828,7 +2942,7 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -2839,12 +2953,14 @@ // CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32: user_code.entry: // CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK-32-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -2852,12 +2968,12 @@ // // // CHECK-32-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2870,95 +2986,99 @@ // CHECK-32-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32: omp.dispatch.cond: -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32: cond.true: // CHECK-32-NEXT: br label [[COND_END:%.*]] // CHECK-32: cond.false: -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[COND_END]] // CHECK-32: cond.end: -// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32: omp.dispatch.body: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32: omp.inner.for.cond: -// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32: omp.inner.for.body: -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] // CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP21]] +// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP22]] // CHECK-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32: omp.body.continue: // CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32: omp.inner.for.inc: -// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK-32: omp.inner.for.end: // CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32: omp.dispatch.inc: -// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32: omp.dispatch.end: -// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32: .omp.final.then: // CHECK-32-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-NEXT: store i32 10, ptr [[J]], align 4 @@ -2973,8 +3093,7 @@ // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2986,15 +3105,17 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -3002,13 +3123,13 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3021,111 +3142,117 @@ // CHECK-32-EX-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK-32-EX-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-32-EX-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-32-EX-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-32-EX: omp.precond.then: // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK-32-EX-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP4]], i32 0, i32 [[TMP23]] // CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: store i32 [[TMP24]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK-32-EX-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK-32-EX-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] // CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK-32-EX: .omp.final.done: -// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK-32-EX: .omp.lastprivate.then: -// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP37:%.*]] = load i32, ptr [[L]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP37]], ptr [[L]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK-32-EX: .omp.lastprivate.done: // CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]] @@ -3139,7 +3266,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3150,12 +3277,14 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -3163,12 +3292,12 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3181,67 +3310,71 @@ // CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK-32-EX: omp.precond.then: // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]] -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1 // CHECK-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16 // CHECK-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP16]] @@ -3249,32 +3382,32 @@ // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: -// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -3290,6 +3423,7 @@ // CHECK-32-EX-SAME: (ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -3299,9 +3433,11 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -3309,11 +3445,11 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3323,77 +3459,79 @@ // CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3406,7 +3544,7 @@ // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -3417,12 +3555,14 @@ // CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-32-EX: user_code.entry: // CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK-32-EX-NEXT: call void @__omp_outlined__3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK-32-EX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-32-EX-NEXT: ret void // CHECK-32-EX: worker.exit: @@ -3430,12 +3570,12 @@ // // // CHECK-32-EX-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] { +// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-32-EX-NEXT: entry: // CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-EX-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-32-EX-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3448,95 +3588,99 @@ // CHECK-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK-32-EX-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK-32-EX: omp.dispatch.cond: -// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK-32-EX: cond.true: // CHECK-32-EX-NEXT: br label [[COND_END:%.*]] // CHECK-32-EX: cond.false: -// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[COND_END]] // CHECK-32-EX: cond.end: -// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK-32-EX: omp.dispatch.body: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK-32-EX: omp.inner.for.cond: -// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK-32-EX: omp.inner.for.body: -// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 10 // CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10 +// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[MUL5]] // CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]] -// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]] // CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]] // CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]] +// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] +// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[MUL8]] +// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP20]] +// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP22]] // CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK-32-EX: omp.body.continue: // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK-32-EX: omp.inner.for.inc: -// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK-32-EX: omp.inner.for.end: // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK-32-EX: omp.dispatch.inc: -// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK-32-EX: omp.dispatch.end: -// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-32-EX-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-32-EX-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK-32-EX: .omp.final.then: // CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4 // CHECK-32-EX-NEXT: store i32 10, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -80,6 +80,7 @@ // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -91,9 +92,11 @@ // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void @@ -102,16 +105,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -119,6 +124,7 @@ // CHECK1-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -130,9 +136,11 @@ // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void @@ -141,16 +149,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK1-NEXT: ret void // // @@ -158,6 +168,7 @@ // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -169,9 +180,11 @@ // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void @@ -180,16 +193,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -197,6 +212,7 @@ // CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -208,9 +224,11 @@ // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void @@ -219,16 +237,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -238,6 +258,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -251,9 +272,11 @@ // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void @@ -262,16 +285,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -281,6 +306,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -294,9 +320,11 @@ // CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void @@ -305,16 +333,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK3-NEXT: ret void // // @@ -324,6 +354,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -337,9 +368,11 @@ // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void @@ -348,16 +381,18 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK4-NEXT: ret void // // @@ -367,6 +402,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -380,9 +416,11 @@ // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK4-NEXT: ret void @@ -391,15 +429,17 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -54,6 +54,7 @@ // CHECK1-SAME: (i64 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8 @@ -65,9 +66,11 @@ // CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK1-NEXT: ret void @@ -76,38 +79,40 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) -// CHECK1-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK1-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK1-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[E]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E]], i64 8) // CHECK1-NEXT: ret void // // @@ -324,6 +329,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 @@ -339,9 +345,13 @@ // CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -351,58 +361,59 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK1-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK1-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK1-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK1-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK1-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK1-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK1-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK1-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C]], i64 1) // CHECK1-NEXT: ret void // // @@ -578,13 +589,13 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 // CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 // CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 @@ -604,11 +615,11 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 @@ -630,13 +641,13 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 // CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 // CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 @@ -656,11 +667,11 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 @@ -673,6 +684,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -682,9 +694,13 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -692,130 +708,138 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP9]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK1: cond.true9: -// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: br label [[COND_END11:%.*]] -// CHECK1: cond.false10: -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: br label [[COND_END11]] -// CHECK1: cond.end11: -// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] -// CHECK1-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK1: cond.true7: +// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false8: +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK1-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void @@ -1151,13 +1175,13 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 // CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 @@ -1177,11 +1201,11 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 @@ -1203,13 +1227,13 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 // CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 // CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 @@ -1229,11 +1253,11 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 @@ -1246,6 +1270,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 @@ -1257,9 +1282,11 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1267,38 +1294,40 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) -// CHECK2-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK2-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK2-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK2-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] -// CHECK2-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK2-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK2-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i32 8) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[E]], i32 8) // CHECK2-NEXT: ret void // // @@ -1515,6 +1544,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 @@ -1530,9 +1560,13 @@ // CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -1542,58 +1576,59 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK2-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK2-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK2-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK2-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK2-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK2-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK2-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK2-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK2-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK2-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 1) // CHECK2-NEXT: ret void // // @@ -1769,13 +1804,13 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 // CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 // CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 @@ -1795,11 +1830,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -1821,13 +1856,13 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 // CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 // CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 @@ -1847,11 +1882,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -1864,6 +1899,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1873,9 +1909,13 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1883,130 +1923,138 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i32 8) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK2-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK2-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK2-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP9]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK2: cond.true9: -// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: br label [[COND_END11:%.*]] -// CHECK2: cond.false10: -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: br label [[COND_END11]] -// CHECK2: cond.end11: -// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] -// CHECK2-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK2: cond.true7: +// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: br label [[COND_END9:%.*]] +// CHECK2: cond.false8: +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: br label [[COND_END9]] +// CHECK2: cond.end9: +// CHECK2-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK2-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void @@ -2342,13 +2390,13 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 // CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 @@ -2368,11 +2416,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -2394,13 +2442,13 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 // CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 // CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 @@ -2420,11 +2468,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -2437,6 +2485,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 @@ -2448,9 +2497,11 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2458,38 +2509,40 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) -// CHECK3-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK3-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP5]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK3-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i32 8) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[E]], i32 8) // CHECK3-NEXT: ret void // // @@ -2706,6 +2759,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 @@ -2721,9 +2775,13 @@ // CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) // CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) @@ -2733,58 +2791,59 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK3-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK3-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK3-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK3-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK3-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK3-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP11]] to i32 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK3-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 1) // CHECK3-NEXT: ret void // // @@ -2960,13 +3019,13 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 // CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 // CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 @@ -2986,11 +3045,11 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -3012,13 +3071,13 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 // CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 // CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 @@ -3038,11 +3097,11 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -3055,6 +3114,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -3064,9 +3124,13 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3074,130 +3138,138 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP8]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i32 8) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP14]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP9]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END11:%.*]] -// CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END11]] -// CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP15]], [[COND_TRUE9]] ], [ [[TMP16]], [[COND_FALSE10]] ] -// CHECK3-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK3: cond.true7: +// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: br label [[COND_END9:%.*]] +// CHECK3: cond.false8: +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: br label [[COND_END9]] +// CHECK3: cond.end9: +// CHECK3-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK3-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void @@ -3533,13 +3605,13 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 // CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 @@ -3559,11 +3631,11 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 @@ -3585,13 +3657,13 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 // CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] // CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 // CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 @@ -3611,11 +3683,11 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] // CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 diff --git a/clang/test/OpenMP/openmp_win_codegen.cpp b/clang/test/OpenMP/openmp_win_codegen.cpp --- a/clang/test/OpenMP/openmp_win_codegen.cpp +++ b/clang/test/OpenMP/openmp_win_codegen.cpp @@ -49,34 +49,38 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @"?main@Test@@SAXXZ"() -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: invoke void @"?foo@@YAXXZ"() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]] // CHECK1: catch.dispatch: -// CHECK1-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] +// CHECK1-NEXT: [[TMP1:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] // CHECK1: catch: -// CHECK1-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr @"??_R0H@8", i32 0, ptr %t] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP1]]) ] +// CHECK1-NEXT: [[TMP2:%.*]] = catchpad within [[TMP1]] [ptr @"??_R0H@8", i32 0, ptr %t] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP2]]) ] // CHECK1-NEXT: to label [[INVOKE_CONT1:%.*]] unwind label [[TERMINATE2:%.*]] // CHECK1: invoke.cont1: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: catchret from [[TMP1]] to label [[CATCHRET_DEST:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: catchret from [[TMP2]] to label [[CATCHRET_DEST:%.*]] // CHECK1: catchret.dest: // CHECK1-NEXT: br label [[TRY_CONT:%.*]] // CHECK1: try.cont: @@ -84,47 +88,49 @@ // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[TRY_CONT]] // CHECK1: terminate: -// CHECK1-NEXT: [[TMP4:%.*]] = cleanuppad within none [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP4]]) ] +// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within none [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP5]]) ] // CHECK1-NEXT: unreachable // CHECK1: terminate2: -// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within [[TMP1]] [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP5]]) ] +// CHECK1-NEXT: [[TMP6:%.*]] = cleanuppad within [[TMP2]] [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP6]]) ] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LOCAL_J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 3, ptr [[LOCAL_J]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: store i32 4, ptr [[LOCAL_J]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[LOCAL_J]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP2]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func, i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOCAL_J]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[LOCAL_J]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[LOCAL_J]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/outlined_artificial.c b/clang/test/OpenMP/outlined_artificial.c --- a/clang/test/OpenMP/outlined_artificial.c +++ b/clang/test/OpenMP/outlined_artificial.c @@ -54,14 +54,11 @@ // CHECK-DAG: !DISubprogram(name: "__captured_stmt" // CHECK-DAG-SAME: flags: DIFlagArtificial -// CHECK-DAG: !DISubprogram(name: ".omp_outlined._debug__" +// CHECK-DAG: !DISubprogram(name: ".omp_outlined." // CHECK-DAG-SAME: flags: DIFlagArtificial // CHECK-DAG: !DISubprogram(linkageName: ".omp_task_entry." // CHECK-DAG-SAME: flags: DIFlagArtificial -// CHECK-DAG: !DISubprogram(name: ".omp_outlined." -// CHECK-DAG-SAME: flags: DIFlagArtificial - // CHECK-DAG: !DISubprogram(name: ".omp_outlined..1" // CHECK-DAG-SAME: flags: DIFlagArtificial diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -78,6 +78,9 @@ // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -87,45 +90,56 @@ // CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[VLA]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..3, i64 [[TMP1]], ptr [[VLA]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP8]]) // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP10]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -145,105 +159,121 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[GLOBAL]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP9:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP10]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i64 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -251,6 +281,7 @@ // CHECK1-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0 @@ -258,38 +289,43 @@ // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[ARGC_ADDR]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP7]] // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP9]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -309,6 +345,9 @@ // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] @@ -322,59 +361,69 @@ // CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG23]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB5:[0-9]+]], i32 1, ptr @.omp_outlined..4, i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 2, ptr @.omp_outlined..8, i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG34:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]), !dbg [[DBG36:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG32:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG32]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP4]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0, !dbg [[DBG33:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8, !dbg [[DBG33]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB5:[0-9]+]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]), !dbg [[DBG33]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0, !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP7]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]), !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP8]]), !dbg [[DBG36:![0-9]+]] // CHECK2-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4, !dbg [[DBG37:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP4]]), !dbg [[DBG38]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG38]] -// CHECK2-NEXT: ret i32 [[TMP5]], !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP9]]), !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG38]] +// CHECK2-NEXT: ret i32 [[TMP10]], !dbg [[DBG38]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG54:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG55]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG56:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG55:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG55]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG57:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG58:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG57]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG53]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG53]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]] -// CHECK2-NEXT: unreachable, !dbg [[DBG53]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG56]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG56]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR7:[0-9]+]], !dbg [[DBG56]] +// CHECK2-NEXT: unreachable, !dbg [[DBG56]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG60:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG67:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -384,331 +433,204 @@ // CHECK2-NEXT: unreachable // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG70:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: call void @.omp_outlined._debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]] -// CHECK2-NEXT: ret void, !dbg [[DBG72]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG82]] -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] -// CHECK2-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined..3, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]), !dbg [[DBG82]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP2]]), !dbg [[DBG86]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG81:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG81]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG81]] +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16, !dbg [[DBG81]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG85]] +// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[TMP6]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]), !dbg [[DBG86]] // CHECK2-NEXT: ret void, !dbg [[DBG88:![0-9]+]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] -// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG101]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG101:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG101]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG102:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG103]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG100]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]] -// CHECK2-NEXT: unreachable, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP9:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 0, !dbg [[DBG102]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP10]]) #[[ATTR7]], !dbg [[DBG102]] +// CHECK2-NEXT: unreachable, !dbg [[DBG102]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG106:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.2(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]] -// CHECK2-NEXT: ret void, !dbg [[DBG112]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG117:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG118]] +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG118]] +// CHECK2-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.1(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]] -// CHECK2-NEXT: ret void, !dbg [[DBG118]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG120:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG126]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 2, ptr @.omp_outlined..7, i64 [[TMP0]], ptr [[TMP1]]), !dbg [[DBG126]] -// CHECK2-NEXT: ret void, !dbg [[DBG127:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG128:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG135]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG136]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG131:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG132:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG132]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG133:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG137:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG139:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG137]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG134:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG135:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG136:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG134]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG135]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG135]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]] -// CHECK2-NEXT: unreachable, !dbg [[DBG135]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.6(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]] -// CHECK2-NEXT: ret void, !dbg [[DBG146]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.5(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]] -// CHECK2-NEXT: ret void, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG133]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG133]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR7]], !dbg [[DBG133]] +// CHECK2-NEXT: unreachable, !dbg [[DBG133]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG137:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG161]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 2, ptr @.omp_outlined..10, ptr [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]] -// CHECK2-NEXT: ret i32 0, !dbg [[DBG164:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG165:![0-9]+]] { +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG144:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG144]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG145:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG146:![0-9]+]] +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP4]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG146]] +// CHECK2-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG146]] +// CHECK2-NEXT: ret i32 0, !dbg [[DBG147:![0-9]+]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG148:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG175]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] -// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG159:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG160:![0-9]+]] +// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG162:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG187]] -// CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG171:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP7]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG171]] +// CHECK2-NEXT: ret void, !dbg [[DBG172:![0-9]+]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG178]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG178]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]] -// CHECK2-NEXT: unreachable, !dbg [[DBG178]] +// CHECK2-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG162]] +// CHECK2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0, !dbg [[DBG162]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP9]]) #[[ATTR7]], !dbg [[DBG162]] +// CHECK2-NEXT: unreachable, !dbg [[DBG162]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG173:![0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.9(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]] -// CHECK2-NEXT: ret void, !dbg [[DBG201]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG178:![0-9]+]] // // // CHECK3-LABEL: define {{[^@]+}}@main diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -215,6 +215,8 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -251,10 +253,10 @@ // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..3) -// CHECK1-NEXT: [[CALL6:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL6]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL7]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP8]] @@ -374,27 +376,30 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 @_ZZ4mainE3vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 @_ZZ4mainE3vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ4mainE5s_arr, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -402,57 +407,62 @@ // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP12]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP14]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP13]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR2]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]], ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -468,7 +478,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_threadprivate_register(ptr @[[GLOB1]], ptr @_ZZ5tmainIiET_vE5s_arr, ptr @.__kmpc_global_ctor_..4, ptr null, ptr @.__kmpc_global_dtor_..5) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE5s_arr, i32 noundef 1) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_0]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor.6, ptr null, ptr @__dso_handle) #[[ATTR3]] // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ5tmainIiET_vE5s_arr) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END]] @@ -488,8 +498,8 @@ // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ5tmainIiET_vE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..9) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_6]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -574,9 +584,9 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP1]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: ret ptr [[TMP2]] @@ -601,11 +611,11 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP1]], i64 2 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[TMP1]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -620,8 +630,8 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_0:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_1:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @_ZZ5tmainIiET_vE5s_arr // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -651,73 +661,79 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 128 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP6]], ptr align 128 @_ZZ5tmainIiET_vE3vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 128 +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP7]], ptr align 128 @_ZZ5tmainIiET_vE3vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ5tmainIiET_vE5s_arr, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP12]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP14]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP13]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -728,7 +744,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP0]], ptr @g, i64 4, ptr @g.cache.) // CHECK1-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 128 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[F]], align 4 @@ -753,7 +769,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP0]], ptr @g, i64 4, ptr @g.cache.) // CHECK1-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 128 @@ -773,27 +789,30 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP3]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP4]] +// CHECK3-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK3: copyin.not.master: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr @g, align 128 -// CHECK3-NEXT: store volatile i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr @g, align 128 +// CHECK3-NEXT: store volatile i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK3-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK3: copyin.not.master.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK3-NEXT: store volatile i32 1, ptr [[TMP6]], align 128 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK3-NEXT: store volatile i32 1, ptr [[TMP7]], align 128 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -813,35 +832,39 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP3]] -// CHECK4-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP4]] +// CHECK4-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK4: copyin.not.master: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr @g, align 128 -// CHECK4-NEXT: store volatile i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr @g, align 128 +// CHECK4-NEXT: store volatile i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK4-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK4: copyin.not.master.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP6]], align 128 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 -// CHECK4-NEXT: call void [[TMP7]](ptr noundef @__block_literal_global.2) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP7]], align 128 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 +// CHECK4-NEXT: call void [[TMP8]](ptr noundef @__block_literal_global.2) // CHECK4-NEXT: ret void // // @@ -861,6 +884,7 @@ // CHECK5-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ10array_funcvE1s acquire, align 8 // CHECK5-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK5-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -883,7 +907,7 @@ // CHECK5-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ10array_funcvE1s) #[[ATTR1]] // CHECK5-NEXT: br label [[INIT_END]] // CHECK5: init.end: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // @@ -962,24 +986,27 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ10array_funcvE1a, i64 8, ptr @_ZZ10array_funcvE1a.cache.) -// CHECK5-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ10array_funcvE1a to i64), [[TMP3]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ10array_funcvE1a, i64 8, ptr @_ZZ10array_funcvE1a.cache.) +// CHECK5-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ10array_funcvE1a to i64), [[TMP4]] +// CHECK5-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK5: copyin.not.master: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 @_ZZ10array_funcvE1a, i64 8, i1 false) -// CHECK5-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ10array_funcvE1s, i64 16, ptr @_ZZ10array_funcvE1s.cache.) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 @_ZZ10array_funcvE1a, i64 8, i1 false) +// CHECK5-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ10array_funcvE1s, i64 16, ptr @_ZZ10array_funcvE1s.cache.) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ10array_funcvE1s, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -987,12 +1014,12 @@ // CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) ptr @_ZN2StaSERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done1: // CHECK5-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK5: copyin.not.master.end: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -1035,6 +1062,8 @@ // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -1059,18 +1088,28 @@ // CHECK11-NEXT: store i8 1, ptr @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP4]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP8]]) -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK11-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK11-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK11-NEXT: store i32 [[CALL5]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP14]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1134,102 +1173,105 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] -// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: br i1 [[TMP12]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP10:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP15]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP12:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP17]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP17]], i64 0, i64 0 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: [[TMP19:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP19]], i64 0, i64 0 -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP24:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP24]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK11-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP10:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK11-SAME: () #[[ATTR2]] comdat { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]], ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -1239,7 +1281,7 @@ // CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK11: init.check: // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE5s_arr, i32 noundef 1) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_0]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor.2, ptr null, ptr @__dso_handle) #[[ATTR4]] // CHECK11-NEXT: store i8 1, ptr @_ZGVZ5tmainIiET_vE5s_arr, align 8 // CHECK11-NEXT: br label [[INIT_END]] @@ -1253,13 +1295,23 @@ // CHECK11-NEXT: store i8 1, ptr @_ZGVZ5tmainIiET_vE3var, align 8 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP4]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP8]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_4]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: ret i32 0 // @@ -1356,8 +1408,8 @@ // CHECK11-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_0:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_1:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @_ZZ5tmainIiET_vE5s_arr // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1366,90 +1418,91 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] -// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: br i1 [[TMP12]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP4]], align 128 -// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP9]], ptr align 128 [[TMP1]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP10]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 128 +// CHECK11-NEXT: [[TMP14:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP14]], ptr align 128 [[TMP4]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP15]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP12:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP17]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 128 -// CHECK11-NEXT: [[TMP17:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP17]], i64 0, i64 0 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[TMP18:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: [[TMP19:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP19]], i64 0, i64 0 -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 128 +// CHECK11-NEXT: [[TMP22:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[TMP23:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP24:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP24]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK11-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) // CHECK11-NEXT: ret void // // @@ -1459,7 +1512,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[F]], align 4 @@ -1483,7 +1536,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) // CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 128 @@ -1509,31 +1562,33 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK13-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK13-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK13-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK13-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK13-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK13-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK13-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK13-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK13: copyin.not.master: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK13-NEXT: store volatile i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK13-NEXT: store volatile i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK13-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK13: copyin.not.master.end: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK13-NEXT: store volatile i32 1, ptr [[TMP8]], align 128 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK13-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK13-NEXT: store volatile i32 1, ptr [[TMP10]], align 128 // CHECK13-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK13-NEXT: ret void // @@ -1559,40 +1614,45 @@ // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK14-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK14-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK14-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK14-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK14-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK14-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK14: copyin.not.master: -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK14-NEXT: store volatile i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK14-NEXT: store volatile i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK14-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK14: copyin.not.master.end: -// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK14-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: store volatile i32 1, ptr [[TMP8]], align 128 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 -// CHECK14-NEXT: call void [[TMP9]](ptr noundef @__block_literal_global.2) +// CHECK14-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK14-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK14-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: store volatile i32 1, ptr [[TMP10]], align 128 +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 +// CHECK14-NEXT: call void [[TMP11]](ptr noundef @__block_literal_global.2) // CHECK14-NEXT: ret void // // @@ -1617,6 +1677,7 @@ // CHECK15-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK15-NEXT: [[TMP0:%.*]] = load i8, ptr @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK15-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -1633,9 +1694,13 @@ // CHECK15-NEXT: store i8 1, ptr @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: br label [[INIT_END]] // CHECK15: init.end: -// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) -// CHECK15-NEXT: [[TMP3:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP2]], ptr [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) +// CHECK15-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) +// CHECK15-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // @@ -1676,44 +1741,45 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[S:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) -// CHECK15-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK15-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK15-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK15-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK15-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK15-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK15-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK15: copyin.not.master: -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK15-NEXT: [[TMP6:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP6]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK15-NEXT: [[TMP9:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP9]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) ptr @_ZN2StaSERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK15: omp.arraycpy.done1: // CHECK15-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK15: copyin.not.master.end: -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK15-NEXT: ret void // // @@ -1762,49 +1828,58 @@ // CHECK16-LABEL: define {{[^@]+}}@_Z3foov // CHECK16-SAME: () #[[ATTR3:[0-9]+]] { // CHECK16-NEXT: entry: -// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK16-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = call ptr @_ZTW1t() -// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK16-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T:%.*]]) #[[ATTR4]] { +// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK16-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = call ptr @_ZTW1t() -// CHECK16-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK16-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK16-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK16-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK16-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK16-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK16-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK16-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK16: copyin.not.master: -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK16-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK16-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK16: copyin.not.master.end: -// CHECK16-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK16-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK16-NEXT: [[TMP8:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK16-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK16-NEXT: [[TMP10:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK16-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK16-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_copyin_combined_codegen.c b/clang/test/OpenMP/parallel_copyin_combined_codegen.c --- a/clang/test/OpenMP/parallel_copyin_combined_codegen.c +++ b/clang/test/OpenMP/parallel_copyin_combined_codegen.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2 // RUN: %clang_cc1 -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics @@ -52,481 +53,528 @@ a[i] = i + x; } -// CHECK-LABEL: define {{[^@]+}}@test_omp_parallel_copyin +// CHECK-LABEL: define dso_local void @test_omp_parallel_copyin // CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[TMP1]]) -// CHECK-NEXT: ret void +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] -// CHECK: copyin.not.master: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] -// CHECK: copyin.not.master.end: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 -// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) -// CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@test_omp_parallel_for_copyin -// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[TMP1]]) -// CHECK-NEXT: ret void +// CHECK-LABEL: define internal void @.omp_outlined. +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK: copyin.not.master: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP5]], align 4 +// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] +// CHECK: copyin.not.master.end: +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]]) +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) +// CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] -// CHECK: copyin.not.master: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] -// CHECK: copyin.not.master.end: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 -// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) -// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@test_omp_parallel_for_simd_copyin -// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: store i32 3, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[TMP1]]) -// CHECK-NEXT: ret void +// CHECK-LABEL: define dso_local void @test_omp_parallel_for_copyin +// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] -// CHECK: copyin.not.master: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] -// CHECK: copyin.not.master.end: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 -// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK: cond.true: -// CHECK-NEXT: br label [[COND_END:%.*]] -// CHECK: cond.false: -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: br label [[COND_END]] -// CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group -// CHECK-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !llvm.access.group -// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] -// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group -// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group -// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK: omp.body.continue: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK: omp.loop.exit: -// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) -// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK: .omp.final.then: -// CHECK-NEXT: store i32 100, ptr [[I]], align 4 -// CHECK-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK: .omp.final.done: -// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@test_omp_parallel_sections_copyin -// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: store i32 4, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[TMP1]]) -// CHECK-NEXT: ret void +// CHECK-LABEL: define internal void @.omp_outlined..1 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK: copyin.not.master: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP5]], align 4 +// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] +// CHECK: copyin.not.master.end: +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) +// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] -// CHECK-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] -// CHECK: copyin.not.master: -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 -// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] -// CHECK: copyin.not.master.end: -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 -// CHECK-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP12]], 1 -// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 1 -// CHECK-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK-NEXT: switch i32 [[TMP18]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ -// CHECK-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] -// CHECK-NEXT: ] -// CHECK: .omp.sections.case: -// CHECK-NEXT: [[TMP19:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP21]], align 4 -// CHECK-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK: .omp.sections.case1: -// CHECK-NEXT: [[TMP22:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 -// CHECK-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK: .omp.sections.exit: -// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK: omp.inner.for.end: -// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4:[0-9]+]], i32 [[TMP27]]) -// CHECK-NEXT: ret void // -// CHECK-LABEL: define {{[^@]+}}@test_omp_parallel_master_copyin -// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: store i32 5, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[TMP1]]) -// CHECK-NEXT: ret void +// CHECK-LABEL: define dso_local void @test_omp_parallel_for_simd_copyin +// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store i32 3, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..2 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK: copyin.not.master: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP5]], align 4 +// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] +// CHECK: copyin.not.master.end: +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 99 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP21:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) +// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK: .omp.final.then: +// CHECK-NEXT: store i32 100, ptr [[I]], align 4 +// CHECK-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK: .omp.final.done: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @test_omp_parallel_sections_copyin +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store i32 4, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..3 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP8]], [[TMP9]] +// CHECK-NEXT: br i1 [[TMP10]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK: copyin.not.master: +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP7]], align 4 +// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] +// CHECK: copyin.not.master.end: +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = icmp slt i32 [[TMP16]], 1 +// CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 1 +// CHECK-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK-NEXT: switch i32 [[TMP22]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] +// CHECK-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] +// CHECK-NEXT: ] +// CHECK: .omp.sections.case: +// CHECK-NEXT: [[TMP23:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP25]], align 4 +// CHECK-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK: .omp.sections.case1: +// CHECK-NEXT: [[TMP26:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP28]], align 4 +// CHECK-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] +// CHECK: .omp.sections.exit: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP31]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define dso_local void @test_omp_parallel_master_copyin +// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store i32 5, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define internal void @.omp_outlined..4 +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK: copyin.not.master: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[TMP5]], align 4 +// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] +// CHECK: copyin.not.master.end: +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB3]], i32 [[TMP13]]) +// CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK-NEXT: br i1 [[TMP15]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], 100 +// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP19]] +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] +// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB3]], i32 [[TMP13]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define weak_odr hidden ptr @_ZTW1x +// CHECK-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) +// CHECK-NEXT: ret ptr [[TMP1]] // -// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] -// CHECK: copyin.not.master: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 4 -// CHECK-NEXT: br label [[COPYIN_NOT_MASTER_END]] -// CHECK: copyin.not.master.end: -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) -// CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -// CHECK: omp_if.then: -// CHECK-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK-NEXT: br label [[FOR_COND:%.*]] -// CHECK: for.cond: -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP13]], 100 -// CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] -// CHECK: for.body: -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[TMP15:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP16]] -// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[IDXPROM]] -// CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: br label [[FOR_INC:%.*]] -// CHECK: for.inc: -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 -// CHECK-NEXT: br label [[FOR_COND]] -// CHECK: for.end: -// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) -// CHECK-NEXT: br label [[OMP_IF_END:%.*]] -// CHECK: omp_if.end: -// CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -249,10 +249,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr nonnull align 4 dereferenceable(28) [[SS]], ptr nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) @@ -263,34 +262,40 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[VEC]], i32 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP7]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP10]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -330,66 +335,70 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -439,21 +448,26 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i32 4, ptr inttoptr (i32 1 to ptr)) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i32 4, ptr inttoptr (i32 1 to ptr)) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTT_VAR__VOID_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i32 1 to ptr)) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i32 1 to ptr)) // CHECK1-NEXT: ret void // // @@ -461,46 +475,52 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i32 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP3]]) +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP8]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -512,9 +532,7 @@ // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -541,65 +559,76 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK1-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK1-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK1-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP14]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP17]], i32 0, i32 2 // CHECK1-NEXT: store i32 1111, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -721,63 +750,65 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i32 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -807,14 +838,19 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: ret void // // @@ -824,7 +860,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -835,39 +871,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[THIS1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 // CHECK1-NEXT: ret void // // @@ -879,7 +921,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -897,9 +939,9 @@ // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK1-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -953,9 +995,7 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -982,63 +1022,74 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK3-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK3-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK3-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1047,9 +1098,7 @@ // CHECK3-SAME: (ptr nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1069,77 +1118,94 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], i32 [[TMP14]], i32 [[TMP18]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1189,34 +1255,40 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 -// CHECK4-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1228,14 +1300,14 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 16 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP5]], ptr [[BLOCK_CAPTURED]], align 128 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK4-NEXT: call void [[TMP3]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK4-NEXT: call void [[TMP8]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1262,9 +1334,7 @@ // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -1291,53 +1361,64 @@ // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK4-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK4-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK4-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK4-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 4 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1349,19 +1430,19 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED5]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK4-NEXT: call void [[TMP7]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK4-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void [[TMP16]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1370,9 +1451,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -1391,55 +1470,65 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], i32 [[TMP7]], i32 [[TMP9]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK4-NEXT: ret void // // @@ -1453,10 +1542,9 @@ // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN2SSC1ERi(ptr nonnull align 8 dereferenceable(32) [[SS]], ptr nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) @@ -1467,34 +1555,40 @@ // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[VEC]], i64 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP7]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP10]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -1534,66 +1628,70 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -1643,21 +1741,26 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 1 to ptr)) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 4, ptr inttoptr (i64 1 to ptr)) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTT_VAR__VOID_ADDR]], align 4 -// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i64 1 to ptr)) +// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i64 1 to ptr)) // CHECK9-NEXT: ret void // // @@ -1665,46 +1768,52 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i64 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP3]]) +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP5]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP8]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1716,9 +1825,7 @@ // CHECK9-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1745,65 +1852,76 @@ // CHECK9-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK9-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK9-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK9-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 -// CHECK9-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 -// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK9-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP14]], -1 +// CHECK9-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP17]], i64 0, i64 2 // CHECK9-NEXT: store i32 1111, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -1925,63 +2043,65 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -2011,14 +2131,19 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret void // // @@ -2028,7 +2153,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -2039,39 +2164,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[A]], align 4 // CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 // CHECK9-NEXT: ret void // // @@ -2083,7 +2214,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2101,9 +2232,9 @@ // CHECK9-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK9-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2157,9 +2288,7 @@ // CHECK11-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2186,63 +2315,74 @@ // CHECK11-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK11-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK11-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK11-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP6]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK11-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B]], ptr [[TMP15]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 // CHECK11-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2251,9 +2391,7 @@ // CHECK11-SAME: (ptr nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -2273,77 +2411,94 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK11-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK11-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK11-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK11-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK11-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[G]], ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 8 // CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2393,34 +2548,40 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 -// CHECK12-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK12-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) +// CHECK12-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK12-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK12-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK12-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK12-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2432,14 +2593,14 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK12-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK12-NEXT: store volatile i32 [[TMP5]], ptr [[BLOCK_CAPTURED]], align 128 // CHECK12-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 32 -// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK12-NEXT: call void [[TMP3]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK12-NEXT: call void [[TMP8]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // @@ -2466,9 +2627,7 @@ // CHECK12-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK12-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2495,53 +2654,64 @@ // CHECK12-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK12-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK12-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK12-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK12-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK12-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK12-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK12-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK12-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK12-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK12-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK12-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK12-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK12-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2553,19 +2723,19 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK12-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED5]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK12-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED6]], align 8 -// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK12-NEXT: call void [[TMP7]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK12-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK12-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK12-NEXT: call void [[TMP16]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // @@ -2574,9 +2744,7 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2595,55 +2763,65 @@ // CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 8 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 8 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[C_CASTED]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP12]]) +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[TMP11]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK12-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK12-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK12-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK12-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK12-NEXT: ret void // // @@ -2657,6 +2835,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2673,63 +2852,74 @@ // CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 128 // CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR0]], align 8 // CHECK17-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 8, ptr @.omp_outlined., ptr [[TMP8]], ptr [[N_ADDR]], i64 [[TMP1]], ptr [[TMP9]], ptr [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], ptr [[VLA]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i64 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 [[TMP11]], ptr [[TMP12]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i64 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 [[TMP20]], ptr [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK17-NEXT: ret void // // @@ -2743,6 +2933,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2764,73 +2955,84 @@ // CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // CHECK17-NEXT: store i32 [[TMP8]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i64 [[TMP1]], ptr [[TMP9]], ptr [[THIS1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[VLA]], ptr [[N_ADDR]], ptr [[TMP10]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP11]]) +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[THIS1]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[VLA]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 -// CHECK17-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 127 -// CHECK17-NEXT: [[TMP9:%.*]] = udiv i64 [[TMP8]], 128 -// CHECK17-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 128 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP12]], i64 [[TMP10]], ptr inttoptr (i64 8 to ptr)) -// CHECK17-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[DOTVLA2__VOID_ADDR]], ptr align 128 [[TMP4]], i64 [[TMP14]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP15]] to double -// CHECK17-NEXT: [[TMP16:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DOTVLA2__VOID_ADDR]], i64 [[TMP16]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +// CHECK17-NEXT: [[TMP17:%.*]] = add nuw i64 [[TMP16]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP17]], 128 +// CHECK17-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 128 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP21]], i64 [[TMP19]], ptr inttoptr (i64 8 to ptr)) +// CHECK17-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[DOTVLA2__VOID_ADDR]], ptr align 128 [[TMP11]], i64 [[TMP23]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double +// CHECK17-NEXT: [[TMP25:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DOTVLA2__VOID_ADDR]], i64 [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX7]], align 8 -// CHECK17-NEXT: [[CONV8:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK17-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP18]], i64 [[IDXPROM10]] -// CHECK17-NEXT: store x86_fp80 [[CONV8]], ptr [[ARRAYIDX11]], align 16 -// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP12]], ptr [[DOTVLA2__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK17-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP27]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store x86_fp80 [[CONV2]], ptr [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP21]], ptr [[DOTVLA2__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -245,24 +245,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -276,90 +277,94 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK1-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK1-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK1-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK1-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK1-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK1-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -372,23 +377,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -398,79 +409,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -481,23 +494,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -507,79 +526,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -590,23 +611,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -616,96 +643,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -716,23 +745,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -742,65 +777,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -818,23 +855,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -844,65 +887,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -922,26 +967,33 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[X]], align 4 // CHECK1-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -957,21 +1009,23 @@ // CHECK1-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -979,85 +1033,85 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK1-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK1-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK1-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK1-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK1-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK1-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK1-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1078,24 +1132,30 @@ // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1107,78 +1167,80 @@ // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK1-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1203,7 +1265,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -1212,23 +1274,26 @@ // CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1240,106 +1305,110 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK1-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK1-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK1-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -1355,24 +1424,25 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -1386,90 +1456,94 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK2-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK2-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK2-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK2-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK2-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK2-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK2-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK2-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK2-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1482,23 +1556,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1508,79 +1588,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1591,23 +1673,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1617,79 +1705,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1700,23 +1790,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1726,96 +1822,98 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK2-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1826,23 +1924,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1852,65 +1956,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 35, i64 0, i64 16908287, i64 1, i64 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -1928,23 +2034,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1954,65 +2066,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 36, i64 0, i64 16908287, i64 1, i64 7) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2032,26 +2146,33 @@ // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[X]], align 4 // CHECK2-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2067,21 +2188,23 @@ // CHECK2-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -2089,85 +2212,85 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK2-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 38, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 38, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK2-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK2-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK2-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK2-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK2-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK2-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK2-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK2-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK2-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2188,24 +2311,30 @@ // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2217,78 +2346,80 @@ // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 37, i32 0, i32 199, i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 37, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK2-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2313,7 +2444,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -2322,23 +2453,26 @@ // CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2350,106 +2484,110 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK2: omp.dispatch.cleanup: // CHECK2-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK2-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK2-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK2-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK2-NEXT: unreachable // // @@ -2465,24 +2603,25 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: store double 5.000000e+00, ptr [[A]], align 8, !dbg [[DBG10:![0-9]+]] // CHECK5-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8, !dbg [[DBG11:![0-9]+]] // CHECK5-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8, !dbg [[DBG11]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !dbg [[DBG11]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]), !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG11]] // CHECK5-NEXT: ret void, !dbg [[DBG12:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -2496,91 +2635,95 @@ // CHECK5-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8, !dbg [[DBG14:![0-9]+]] -// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]], !dbg [[DBG14]] -// CHECK5-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, ptr [[I]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG14:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8, !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]], !dbg [[DBG15]] +// CHECK5-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, ptr [[I]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.precond.then: -// CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG15]] -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]), !dbg [[DBG15]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG15]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG15]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG15]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG16:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG15]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG15]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]]), !dbg [[DBG15]] -// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP26]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG14]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void, !dbg [[DBG16]] // @@ -2592,23 +2735,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG22]] // CHECK5-NEXT: ret void, !dbg [[DBG23:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2618,79 +2767,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG25]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26:![0-9]+]] // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG26]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG26]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG26]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG26]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG26]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG27]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !dbg [[DBG27]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG28:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG25]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG25]] // CHECK5-NEXT: ret void, !dbg [[DBG28]] // // @@ -2701,23 +2852,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB14:[0-9]+]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB14:[0-9]+]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG31]] // CHECK5-NEXT: ret void, !dbg [[DBG32:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2727,79 +2884,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG34]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35:![0-9]+]] // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB11:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB11:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG35]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG35]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG35]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG35]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG35]] // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG36:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG36:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG36]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !dbg [[DBG36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG34]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG34]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB13:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB13:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG34]] // CHECK5-NEXT: ret void, !dbg [[DBG37]] // // @@ -2810,23 +2969,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB19:[0-9]+]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB19:[0-9]+]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG40]] // CHECK5-NEXT: ret void, !dbg [[DBG41:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2836,96 +3001,98 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG43]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44:![0-9]+]] // CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB16:[0-9]+]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB16:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288, !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG44]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG44]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG44]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG44]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127, !dbg [[DBG44]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG45:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]], !dbg [[DBG45]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1, !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB18:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB18:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG43]] // CHECK5-NEXT: ret void, !dbg [[DBG46]] // // @@ -2936,23 +3103,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB21:[0-9]+]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB21:[0-9]+]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG50]] // CHECK5-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -2962,65 +3135,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG53]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54:![0-9]+]] // CHECK5-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG54]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG53]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB21]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG53]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB21]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB21]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG53]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB21]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG53]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG53]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54]] -// CHECK5-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG54]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG54]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG54]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]], !dbg [[DBG56]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG53]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3038,23 +3213,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23:[0-9]+]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23:[0-9]+]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG62]] // CHECK5-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3064,65 +3245,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG65]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66:![0-9]+]] // CHECK5-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG65]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB23]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG65]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB23]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB23]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG65]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB23]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG65]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG65]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66]] -// CHECK5-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG66]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG66]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG66]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]], !dbg [[DBG68]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG65]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3142,26 +3325,33 @@ // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[X]], align 4, !dbg [[DBG74:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[Y]], align 4, !dbg [[DBG75:![0-9]+]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB25:[0-9]+]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB25:[0-9]+]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG76]] // CHECK5-NEXT: ret void, !dbg [[DBG77:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3177,21 +3367,23 @@ // CHECK5-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG80:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8, !dbg [[DBG80]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]], !dbg [[DBG80]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1, !dbg [[DBG80]] @@ -3199,85 +3391,85 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11, !dbg [[DBG81:![0-9]+]] // CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1, !dbg [[DBG81]] // CHECK5-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: store i8 [[TMP7]], ptr [[I]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[TMP13]], ptr [[I]], align 1, !dbg [[DBG80]] // CHECK5-NEXT: store i32 11, ptr [[X]], align 4, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57, !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB25]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1), !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB25]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1), !dbg [[DBG79]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB25]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG79]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB25]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG79]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0, !dbg [[DBG79]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82:![0-9]+]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]], !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]], !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]], !dbg [[DBG80]] // CHECK5-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11, !dbg [[DBG80]] -// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]], !dbg [[DBG80]] +// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]], !dbg [[DBG80]] // CHECK5-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1, !dbg [[DBG81]] // CHECK5-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]], !dbg [[DBG81]] // CHECK5-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32, !dbg [[DBG81]] // CHECK5-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !dbg [[DBG81]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]], !dbg [[DBG83]] +// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]], !dbg [[DBG83]] // CHECK5-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG84:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1, !dbg [[DBG80]] // CHECK5-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG79]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3298,24 +3490,30 @@ // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[X]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB27:[0-9]+]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB27:[0-9]+]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG90]] // CHECK5-NEXT: ret void, !dbg [[DBG91:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3327,78 +3525,80 @@ // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG93]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94:![0-9]+]] // CHECK5-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB27]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG93]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB27]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB27]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG93]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB27]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG93]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG93]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG94]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1, !dbg [[DBG94]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]], !dbg [[DBG94]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8, !dbg [[DBG94]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20, !dbg [[DBG94]] -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]], !dbg [[DBG94]] +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]], !dbg [[DBG94]] // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1, !dbg [[DBG96:![0-9]+]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]], !dbg [[DBG96]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !dbg [[DBG96]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]], !dbg [[DBG97]] // CHECK5-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG94]] // CHECK5-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3423,7 +3623,7 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] @@ -3432,23 +3632,26 @@ // CHECK5-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG105]] // CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG105]] // CHECK5-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG105]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4, !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8, !dbg [[DBG106]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB32:[0-9]+]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]), !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG107:![0-9]+]] -// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]), !dbg [[DBG107]] -// CHECK5-NEXT: ret void, !dbg [[DBG107]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG106:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG106]] +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG107:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB32:[0-9]+]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]), !dbg [[DBG108]] +// CHECK5-NEXT: ret void, !dbg [[DBG108]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 !dbg [[DBG108:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 !dbg [[DBG109:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3460,107 +3663,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG109]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG109]] -// CHECK5-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB29:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG109]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[N]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG110]] +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16, !dbg [[DBG110]] +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB29:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG110]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288, !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288, !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG111]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG111]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG111]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cleanup: -// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond.cleanup: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG112:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]], !dbg [[DBG111]] -// CHECK5-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG111]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]], !dbg [[DBG112]] +// CHECK5-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG112]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP112:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP113:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP113:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP114:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB31:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]), !dbg [[DBG109]] -// CHECK5-NEXT: ret void, !dbg [[DBG111]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB31:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]), !dbg [[DBG110]] +// CHECK5-NEXT: ret void, !dbg [[DBG112]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0, !dbg [[DBG111]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]], !dbg [[DBG111]] -// CHECK5-NEXT: unreachable, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0, !dbg [[DBG112]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]], !dbg [[DBG112]] +// CHECK5-NEXT: unreachable, !dbg [[DBG112]] // // // CHECK5-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -3575,24 +3782,25 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK6-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK6-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -3606,90 +3814,94 @@ // CHECK6-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK6-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK6-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK6-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK6-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK6-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK6-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK6-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK6-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK6-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK6-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK6-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK6-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK6-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK6-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK6-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK6-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK6-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void @@ -3702,23 +3914,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3728,79 +3946,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3811,23 +4031,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3837,79 +4063,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK6-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3920,23 +4148,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3946,96 +4180,98 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK6-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK6-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK6-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK6-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK6-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK6-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK6-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -4046,23 +4282,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4072,65 +4314,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4148,23 +4392,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4174,65 +4424,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4252,26 +4504,33 @@ // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, ptr [[X]], align 4 // CHECK6-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4287,21 +4546,23 @@ // CHECK6-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK6-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK6-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK6-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK6-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -4309,85 +4570,85 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK6-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK6-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK6-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK6-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK6-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK6-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK6-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK6-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK6-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK6-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK6-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK6-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK6-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK6-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK6-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK6-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK6-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK6-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK6-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK6-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK6-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4408,24 +4669,30 @@ // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4437,78 +4704,80 @@ // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK6-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK6-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK6-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK6-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK6-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK6-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4533,7 +4802,7 @@ // CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -4542,23 +4811,26 @@ // CHECK6-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK6-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4570,98 +4842,102 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK6-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK6-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK6-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK6-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK6-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK6: omp.dispatch.cleanup: // CHECK6-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3foov() // CHECK6-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK6-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK6-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK6-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK6-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK6-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK6-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK6-NEXT: ret void // // @@ -4780,17 +5056,20 @@ // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARR]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[ARR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 @@ -4807,22 +5086,24 @@ // CHECK11-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY1]], ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP7]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 @@ -4830,65 +5111,65 @@ // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 // CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i64 [[DIV]], 1 // CHECK11-NEXT: store i64 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP9]], [[TMP10]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP20]], 1 -// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[MUL]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP22]], 1 +// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[MUL]] // CHECK11-NEXT: store ptr [[ADD_PTR8]], ptr [[__BEGIN15]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[__BEGIN15]], align 8 -// CHECK11-NEXT: store ptr [[TMP21]], ptr [[A]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[__BEGIN15]], align 8 +// CHECK11-NEXT: store ptr [[TMP23]], ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[A]], align 8 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP23]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP25]], 1 // CHECK11-NEXT: store i64 [[ADD9]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4898,17 +5179,20 @@ // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARR]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[ARR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -4933,41 +5217,43 @@ // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY2]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR3]], ptr [[__END2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP5]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP5]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP7]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP10]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint ptr [[TMP9]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint ptr [[TMP12]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB12:%.*]] = sub i64 [[SUB_PTR_LHS_CAST10]], [[SUB_PTR_RHS_CAST11]] // CHECK11-NEXT: [[SUB_PTR_DIV13:%.*]] = sdiv exact i64 [[SUB_PTR_SUB12]], 4 // CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i64 [[SUB_PTR_DIV13]], 1 @@ -4976,84 +5262,84 @@ // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[DIV]], [[DIV16]] // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB17]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store ptr [[TMP11]], ptr [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: store ptr [[TMP12]], ptr [[__BEGIN2]], align 8 // CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP13]], [[TMP14]] +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[__BEGIN2]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult ptr [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult ptr [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP18]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]] // CHECK11-NEXT: br i1 [[CMP21]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP26]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint ptr [[TMP29]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint ptr [[TMP30]] to i64 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint ptr [[TMP31]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint ptr [[TMP32]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB25:%.*]] = sub i64 [[SUB_PTR_LHS_CAST23]], [[SUB_PTR_RHS_CAST24]] // CHECK11-NEXT: [[SUB_PTR_DIV26:%.*]] = sdiv exact i64 [[SUB_PTR_SUB25]], 4 // CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i64 [[SUB_PTR_DIV26]], 1 // CHECK11-NEXT: [[ADD28:%.*]] = add nsw i64 [[SUB27]], 1 // CHECK11-NEXT: [[DIV29:%.*]] = sdiv i64 [[ADD28]], 1 // CHECK11-NEXT: [[MUL30:%.*]] = mul nsw i64 1, [[DIV29]] -// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP28]], [[MUL30]] +// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP30]], [[MUL30]] // CHECK11-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV31]], 1 -// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[MUL32]] +// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[MUL32]] // CHECK11-NEXT: store ptr [[ADD_PTR33]], ptr [[__BEGIN119]], align 8 -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint ptr [[TMP34]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint ptr [[TMP35]] to i64 +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint ptr [[TMP36]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint ptr [[TMP37]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB36:%.*]] = sub i64 [[SUB_PTR_LHS_CAST34]], [[SUB_PTR_RHS_CAST35]] // CHECK11-NEXT: [[SUB_PTR_DIV37:%.*]] = sdiv exact i64 [[SUB_PTR_SUB36]], 4 // CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i64 [[SUB_PTR_DIV37]], 1 // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[SUB38]], 1 // CHECK11-NEXT: [[DIV40:%.*]] = sdiv i64 [[ADD39]], 1 // CHECK11-NEXT: [[MUL41:%.*]] = mul nsw i64 1, [[DIV40]] -// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP33]], [[MUL41]] -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint ptr [[TMP36]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint ptr [[TMP37]] to i64 +// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP35]], [[MUL41]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint ptr [[TMP39]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB45:%.*]] = sub i64 [[SUB_PTR_LHS_CAST43]], [[SUB_PTR_RHS_CAST44]] // CHECK11-NEXT: [[SUB_PTR_DIV46:%.*]] = sdiv exact i64 [[SUB_PTR_SUB45]], 4 // CHECK11-NEXT: [[SUB47:%.*]] = sub nsw i64 [[SUB_PTR_DIV46]], 1 @@ -5061,32 +5347,32 @@ // CHECK11-NEXT: [[DIV49:%.*]] = sdiv i64 [[ADD48]], 1 // CHECK11-NEXT: [[MUL50:%.*]] = mul nsw i64 1, [[DIV49]] // CHECK11-NEXT: [[MUL51:%.*]] = mul nsw i64 [[DIV42]], [[MUL50]] -// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP32]], [[MUL51]] +// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP34]], [[MUL51]] // CHECK11-NEXT: [[MUL53:%.*]] = mul nsw i64 [[SUB52]], 1 -// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i64 [[MUL53]] +// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[MUL53]] // CHECK11-NEXT: store ptr [[ADD_PTR54]], ptr [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[__BEGIN119]], align 8 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[A]], align 8 -// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK11-NEXT: store i32 [[TMP40]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[__BEGIN119]], align 8 +// CHECK11-NEXT: store ptr [[TMP40]], ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP41:%.*]] = load ptr, ptr [[__BEGIN220]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP43]], 1 +// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP45]], 1 // CHECK11-NEXT: store i64 [[ADD55]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp --- a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp +++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp @@ -38,128 +38,143 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[A]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A1:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 1 -// CHECK1-NEXT: store i8 0, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 1 +// CHECK1-NEXT: store i8 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP11]], 5 -// CHECK1-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP13]], 5 +// CHECK1-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK1-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK1-NEXT: store i32 0, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp sle i32 [[TMP15]], [[TMP14]] +// CHECK1-NEXT: br i1 [[TMP16]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK1: lp_cond_then: -// CHECK1-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT]] // CHECK1: lp_cond_exit: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[TMP2]], ptr [[I]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp sle i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: br i1 [[TMP18]], label [[LP_COND_THEN4:%.*]], label [[LP_COND_EXIT5:%.*]] -// CHECK1: lp_cond_then4: -// CHECK1-NEXT: store i32 [[TMP16]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT5]] -// CHECK1: lp_cond_exit5: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp sle i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: br i1 [[TMP24]], label [[LP_COND_THEN6:%.*]], label [[LP_COND_EXIT7:%.*]] -// CHECK1: lp_cond_then6: -// CHECK1-NEXT: store i32 [[TMP22]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT7]] -// CHECK1: lp_cond_exit7: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, ptr [[TMP2]], ptr [[I]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i8 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp sle i32 [[TMP21]], [[TMP20]] +// CHECK1-NEXT: br i1 [[TMP22]], label [[LP_COND_THEN3:%.*]], label [[LP_COND_EXIT4:%.*]] +// CHECK1: lp_cond_then3: +// CHECK1-NEXT: store i32 [[TMP20]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT4]] +// CHECK1: lp_cond_exit4: +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP24]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp sle i32 [[TMP27]], [[TMP26]] +// CHECK1-NEXT: br i1 [[TMP28]], label [[LP_COND_THEN5:%.*]], label [[LP_COND_EXIT6:%.*]] +// CHECK1: lp_cond_then5: +// CHECK1-NEXT: store i32 [[TMP26]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT6]] +// CHECK1: lp_cond_exit6: +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_7]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i8 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] // CHECK1: lpc.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp sle i32 [[TMP30]], [[TMP29]] -// CHECK1-NEXT: br i1 [[TMP31]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp sle i32 [[TMP36]], [[TMP35]] +// CHECK1-NEXT: br i1 [[TMP37]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] // CHECK1: lp_cond_then8: -// CHECK1-NEXT: store i32 [[TMP29]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP32]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP35]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT9]] // CHECK1: lp_cond_exit9: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK1-NEXT: br label [[LPC_DONE]] // CHECK1: lpc.done: // CHECK1-NEXT: br label [[IF_END]] @@ -168,67 +183,68 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP39]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP4]]) -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP6]]) +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP37]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP42]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP10]] monotonic, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP13]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -255,21 +271,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP2]] monotonic, align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: store atomic volatile i8 1, ptr [[TMP4]] unordered, align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP5]] monotonic, align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: store atomic volatile i8 1, ptr [[TMP7]] unordered, align 1 // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_linear_codegen.cpp b/clang/test/OpenMP/parallel_for_linear_codegen.cpp --- a/clang/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_linear_codegen.cpp @@ -93,17 +93,22 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -117,12 +122,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -132,88 +136,90 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL6]] to i64 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[LVAR3]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 3 -// CHECK1-NEXT: store ptr [[ADD_PTR8]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[LVAR3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD9]], ptr [[LVAR3]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[LVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 3 +// CHECK1-NEXT: store ptr [[ADD_PTR6]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[LVAR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], ptr [[LVAR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[LVAR3]], align 8 -// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[LVAR]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -225,11 +231,16 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[LVAR]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // @@ -266,12 +277,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -281,87 +291,89 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[LVAR3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 -// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[LVAR3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[LVAR3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[MUL4]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[LVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[LVAR3]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -417,11 +429,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -430,75 +442,77 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -519,18 +533,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -539,55 +556,57 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK4-NEXT: store i32 [[ADD4]], ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: store i32 [[ADD5]], ptr [[G1]], align 4 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK4-NEXT: store i32 [[ADD3]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK4-NEXT: store i32 [[ADD4]], ptr [[G]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -599,29 +618,29 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, ptr [[G1]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP13]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP15]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK4-NEXT: call void [[TMP17]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK4-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK4-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: store i32 [[TMP19]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -40,283 +40,289 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP58]], 1 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP61]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP62]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP66]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[TMP67]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP68]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP65]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP69]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 -// CHECK1-NEXT: [[TMP74:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP73]], ptr [[TMP65]]) +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP67]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[TMP70]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP71]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP68]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP72]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP76]], ptr [[TMP68]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP75]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP77]]) -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP79]], i32 1) -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP80]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP80]]) +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP82]], i32 1) +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP88]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP89]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP89]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP90]], [[TMP91]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP89]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP85]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP88]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP92]] monotonic, align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP94]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP95]] monotonic, align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP96]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 -// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 -// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP99:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP104:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP99]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP99]], i8 [[TMP102]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP104]] = extractvalue { i8, i1 } [[TMP103]], 0 +// CHECK1-NEXT: [[TMP105:%.*]] = extractvalue { i8, i1 } [[TMP103]], 1 +// CHECK1-NEXT: br i1 [[TMP105]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP94]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP103]]) +// CHECK1-NEXT: [[TMP106:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP106]]) // CHECK1-NEXT: ret void // // @@ -455,20 +461,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -482,7 +488,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp --- a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp @@ -25,17 +25,20 @@ // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -45,56 +48,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 16) ] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -70,35 +70,43 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[TMP2]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[TMP3]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z9gtid_testv() // CHECK1-NEXT: ret void // @@ -107,29 +115,32 @@ // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_1]], ptr [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_3]], ptr [[DOTBOUND_ZERO_ADDR4]], ptr [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -139,34 +150,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn4v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn5v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn6v() // CHECK1-NEXT: ret void // @@ -175,29 +195,32 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..5) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[DOTTHREADID_TEMP_1]], ptr [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[DOTTHREADID_TEMP_3]], ptr [[DOTBOUND_ZERO_ADDR4]], ptr [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -205,34 +228,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn1v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn2v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn3v() // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -18,37 +18,45 @@ // CHECK-LABEL: define {{[^@]+}}@_Z3foov // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NEXT: ret void // // // CHECK: Function Attrs: noinline norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: alwaysinline norecurse nounwind +// CHECK: Function Attrs: norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // @@ -56,36 +64,44 @@ // CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov // CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NOINLINE-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NOINLINE-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK-NOINLINE-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NOINLINE-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NOINLINE-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NOINLINE-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NOINLINE-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NOINLINE-NEXT: ret void // // // CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // // -// CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind +// CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_masked.cpp b/clang/test/OpenMP/parallel_masked.cpp --- a/clang/test/OpenMP/parallel_masked.cpp +++ b/clang/test/OpenMP/parallel_masked.cpp @@ -28,25 +28,29 @@ // CHECK-LABEL: define {{[^@]+}}@masked // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP2]], i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -56,26 +60,30 @@ // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NEXT: store i32 1, ptr [[TID]], align 4 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -84,25 +92,29 @@ // CHECK-LABEL: define {{[^@]+}}@master // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_masked_target.cpp b/clang/test/OpenMP/parallel_masked_target.cpp --- a/clang/test/OpenMP/parallel_masked_target.cpp +++ b/clang/test/OpenMP/parallel_masked_target.cpp @@ -31,25 +31,29 @@ // CHECK-LABEL: define {{[^@]+}}@masked // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 0) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP2]], i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -59,26 +63,30 @@ // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TID:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NEXT: store i32 1, ptr [[TID]], align 4 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP1]], i32 1) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_masked(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -87,25 +95,29 @@ // CHECK-LABEL: define {{[^@]+}}@master // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: // CHECK-NEXT: call void (...) @foo() -// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -296,35 +296,39 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_masterv // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -339,28 +343,32 @@ // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[A]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -370,30 +378,35 @@ // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[A]]) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK9-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: ret void @@ -403,33 +416,38 @@ // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 -// CHECK13-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK13-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK13-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK13-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK13: omp_if.then: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 -// CHECK13-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: store i32 [[INC]], ptr [[A]], align 4 +// CHECK13-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: br label [[OMP_IF_END]] // CHECK13: omp_if.end: // CHECK13-NEXT: ret void @@ -439,16 +457,17 @@ // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK17-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_CASTED1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ36parallel_master_default_firstprivatevE1y, align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[Y_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[Y_CASTED1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[Y_CASTED1]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[A]], i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ36parallel_master_default_firstprivatevE1y, align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4:[0-9]+]] // CHECK17-NEXT: ret void // @@ -464,45 +483,51 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A3:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[Y1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y1]], ptr [[Y_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK17-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[Y1]], align 4 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK17-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK17: omp_if.then: -// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A3]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A4]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A4]], align 4 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A3]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], ptr [[B]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[Y_ADDR]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[Y_ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[INC6]], ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[A2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A2]], align 4 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[Y]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store i32 [[INC4]], ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: br label [[OMP_IF_END]] // CHECK17: omp_if.end: -// CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A3]]) #[[ATTR4]] +// CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4]] // CHECK17-NEXT: ret void // // @@ -542,33 +567,38 @@ // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK21-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK21-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK21-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: store i32 [[INC]], ptr [[A]], align 4 +// CHECK21-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: ret void @@ -577,38 +607,42 @@ // CHECK25-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK25-SAME: () #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.) -// CHECK25-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK25-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @a to i64), [[TMP3]] -// CHECK25-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @a, i64 4, ptr @a.cache.) +// CHECK25-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK25-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @a to i64), [[TMP4]] +// CHECK25-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK25: copyin.not.master: -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr @a, align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr @a, align 4 +// CHECK25-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 // CHECK25-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK25: copyin.not.master.end: -// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK25-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK25-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK25-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK25-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK25: omp_if.then: -// CHECK25-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK25-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 -// CHECK25-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK25-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @a, i64 4, ptr @a.cache.) +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: br label [[OMP_IF_END]] // CHECK25: omp_if.end: // CHECK25-NEXT: ret void @@ -617,45 +651,50 @@ // CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK29-SAME: () #[[ATTR0:[0-9]+]] { // CHECK29-NEXT: entry: -// CHECK29-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK29-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK29-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK29-NEXT: ret void // // // CHECK29-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK29-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK29-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK29-NEXT: entry: // CHECK29-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK29-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK29-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK29-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK29-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK29-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK29-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK29-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK29-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK29-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK29-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK29-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK29-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK29-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK29-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK29: copyin.not.master: -// CHECK29-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK29-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK29-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK29-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK29-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK29: copyin.not.master.end: -// CHECK29-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK29-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK29-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) // CHECK29-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK29-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[TMP9]]) -// CHECK29-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK29-NEXT: br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK29-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK29-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK29-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[TMP11]]) +// CHECK29-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK29-NEXT: br i1 [[TMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK29: omp_if.then: -// CHECK29-NEXT: [[TMP12:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK29-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 -// CHECK29-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP14:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK29-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK29-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[TMP11]]) // CHECK29-NEXT: br label [[OMP_IF_END]] // CHECK29: omp_if.end: // CHECK29-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -40,238 +40,244 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP52]]) -// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK1-NEXT: br i1 [[TMP54]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP55]]) +// CHECK1-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 +// CHECK1-NEXT: br i1 [[TMP57]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP62]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP63]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP64]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP61]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP69]], ptr [[TMP61]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP52]]) -// CHECK1-NEXT: br label [[OMP_IF_END]] -// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP63]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP65]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP66]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP67]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP64]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8 // CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP72]], i32 0) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP76:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP76]], ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP78]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP72]], ptr [[TMP64]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP55]]) +// CHECK1-NEXT: br label [[OMP_IF_END]] +// CHECK1: omp_if.end: +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP75]], i32 0) +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP82]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP83]], [[TMP84]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP86]] to i32 +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP78]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP88]] monotonic, align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP92:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP97:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP92]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP92]], i8 [[TMP95]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP97]] = extractvalue { i8, i1 } [[TMP96]], 0 +// CHECK1-NEXT: [[TMP98:%.*]] = extractvalue { i8, i1 } [[TMP96]], 1 +// CHECK1-NEXT: br i1 [[TMP98]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP96]]) +// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP99]]) // CHECK1-NEXT: ret void // // @@ -410,20 +416,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -437,7 +443,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -57,100 +57,109 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_9:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP15]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..8) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_9]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -231,37 +240,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -289,7 +303,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -342,92 +356,99 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP22]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK1-NEXT: [[TMP24:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP19]], i32 [[TMP24]], ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP30]], i32 1, i32 2, i64 [[TMP31]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 80, i64 16, ptr @.omp_task_entry..7) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP26]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP27]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP29]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP30]] to i1 +// CHECK1-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP36]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP26]], i32 [[TMP31]], ptr [[TMP32]], ptr [[TMP33]], i64 [[TMP37]], i32 1, i32 2, i64 [[TMP38]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -462,7 +483,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -499,7 +520,7 @@ // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP18]], align 8 @@ -539,7 +560,7 @@ // CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: store i64 [[TMP39]], ptr [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -588,35 +609,38 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..10) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]], i32 1, ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP10]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..10) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -645,7 +669,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -743,7 +767,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -751,77 +775,86 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..13) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..13) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -856,7 +889,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -889,7 +922,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -907,7 +940,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -200,7 +200,7 @@ // CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 -// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) @@ -212,19 +212,24 @@ // CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP5]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -232,8 +237,8 @@ // CHECK: arraydestroy.done1: // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -276,88 +281,89 @@ // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 -// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP6]], [2 x %struct.S]** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP14]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 16, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP16]], i32 0, i32 1 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP22]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP24]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP23]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done1: -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 -// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 -// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* -// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 -// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** -// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 -// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 -// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 -// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) -// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 -// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP25]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP8]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 2 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 3 +// CHECK-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP28]] to i8* +// CHECK-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 4 +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 3 +// CHECK-NEXT: [[TMP34:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP33]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP34]], align 8 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP36]], align 8 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 9 +// CHECK-NEXT: [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP39]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP40:%.*]] = load i64, i64* [[TMP37]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP15]], i32 1, i64* [[TMP35]], i64* [[TMP36]], i64 [[TMP40]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -410,7 +416,7 @@ // CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 @@ -428,7 +434,7 @@ // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -457,8 +463,8 @@ // CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @@ -512,9 +518,9 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP9]], i32 0, i32 0 // CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 // CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* @@ -531,7 +537,7 @@ // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done3: // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP9]], i32 0, i32 1 // CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) // CHECK-NEXT: ret void @@ -578,44 +584,49 @@ // CHECK-SAME: () #[[ATTR9:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) // CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP5]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK: arraydestroy.done1: -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -673,160 +684,161 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 -// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 -// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 -// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* -// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 -// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.1]* [[TMP6]], [2 x %struct.S.1]** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP14]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.4* +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 128 +// CHECK-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.3* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 16, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 128 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 1 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP24]] to i8* +// CHECK-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP27]], i32 0, i32 0 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP29]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP28]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done1: -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** -// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 -// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 -// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 -// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) -// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 -// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) -// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP30]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP8]], i32 noundef 0) +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 3 +// CHECK-NEXT: [[TMP32:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP31]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP33]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP34]], align 16 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 9 +// CHECK-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP38:%.*]] = load i64, i64* [[TMP35]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP15]], i32 1, i64* [[TMP33]], i64* [[TMP34]], i64 [[TMP38]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 -// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-SAME: (%struct..kmp_privates.t.5* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.1]** noalias noundef [[TMP3:%.*]], %struct.S.1** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.5*, align 8 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 -// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 -// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.1]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.1**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.5* [[TMP0]], %struct..kmp_privates.t.5** [[DOTADDR]], align 8 // CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 // CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.1]** [[TMP3]], [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.1** [[TMP4]], %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.5*, %struct..kmp_privates.t.5** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 1 // CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 // CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 -// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 -// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.1]**, [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.1]* [[TMP10]], [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.1**, %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP13]], align 8 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 -// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -838,27 +850,27 @@ // CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.3*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* -// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.5* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.4* [[TMP3]] to i8* // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -877,23 +889,23 @@ // CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 // CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 -// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.5*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 // CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 -// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 -// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store %struct.anon.3* [[TMP8]], %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 -// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* -// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.1]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.1** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 -// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 -// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.1*, %struct.S.1** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 // CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 // CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 @@ -910,9 +922,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128 // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 // CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4 -// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* -// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[TMP29]] to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 @@ -923,66 +935,66 @@ // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 -// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (%struct.kmp_task_t_with_privates.4* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP0]], %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP12]] to %struct.S.1* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done3: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 -// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 -// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK: arraydestroy.done2: // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 @@ -990,39 +1002,39 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK-NEXT: store i32 0, i32* [[F]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] @@ -1031,25 +1043,25 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: ret void // // @@ -1064,44 +1076,47 @@ // // // LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. -// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // LAMBDA-NEXT: entry: // LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// LAMBDA-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // LAMBDA: omp_if.then: -// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* -// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 -// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 -// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 -// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 -// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 -// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 -// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 -// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 -// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 -// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 -// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 -// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 -// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 -// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 -// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* -// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) -// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 -// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) -// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// LAMBDA-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP10:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP10]], double* [[TMP9]], align 8 +// LAMBDA-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP12:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP15]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP5]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // LAMBDA-NEXT: br label [[OMP_IF_END]] // LAMBDA: omp_if.end: // LAMBDA-NEXT: ret void @@ -1139,12 +1154,12 @@ // LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -1155,7 +1170,7 @@ // LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1184,8 +1199,8 @@ // LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* @@ -1207,11 +1222,11 @@ // LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14 // LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8 // LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4 -// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 0 // LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14 -// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 1 // LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14 -// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) // LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 // LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1236,52 +1251,56 @@ // BLOCKS-NEXT: entry: // BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // BLOCKS-NEXT: ret void // // // BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. -// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // BLOCKS-NEXT: entry: // BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 // BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// BLOCKS-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // BLOCKS: omp_if.then: -// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* -// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 -// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 -// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 -// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 -// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 -// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 -// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 -// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 -// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 -// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 -// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 -// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 -// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 -// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 -// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* -// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) -// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 -// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) -// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP10:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP10]], double* [[TMP9]], align 8 +// BLOCKS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP12:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP15]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP5]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // BLOCKS-NEXT: br label [[OMP_IF_END]] // BLOCKS: omp_if.end: // BLOCKS-NEXT: ret void @@ -1334,7 +1353,7 @@ // BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1350,7 +1369,7 @@ // BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1379,8 +1398,8 @@ // BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* @@ -1439,69 +1458,76 @@ // ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 // ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 -// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP4]], float** [[TMP3]], align 8 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP6]], %struct.St** [[TMP5]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // ARRAY-NEXT: ret void // // // ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. -// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // ARRAY-NEXT: entry: // ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 -// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 -// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 // ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// ARRAY-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// ARRAY-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // ARRAY: omp_if.then: -// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 -// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 -// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 -// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 -// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 -// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 -// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 -// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 -// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* -// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) -// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 -// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) -// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP2]], i64* [[TMP9]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP16]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 +// ARRAY-NEXT: store float* [[TMP18]], float** [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP16]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP20:%.*]] = load %struct.St*, %struct.St** [[TMP4]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP20]], %struct.St** [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP21]], align 8 +// ARRAY-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP22]], align 8 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP25]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP10]], i32 1, i64* [[TMP21]], i64* [[TMP22]], i64 [[TMP26]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // ARRAY-NEXT: br label [[OMP_IF_END]] // ARRAY: omp_if.end: // ARRAY-NEXT: ret void @@ -1539,7 +1565,7 @@ // ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 // ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 // ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1554,7 +1580,7 @@ // ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1583,9 +1609,9 @@ // ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -217,15 +218,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -233,8 +242,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -261,51 +270,50 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP23]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -315,22 +323,22 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP17]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP13]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP18]], i32 1, ptr [[TMP26]], ptr [[TMP27]], i64 [[TMP30]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -432,18 +440,18 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]], ptr [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 4 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -570,38 +578,47 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..2, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,74 +679,73 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP16]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP20]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP17]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP21]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP16]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP21]], align 16 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP23]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP12]], i32 1, ptr [[TMP20]], ptr [[TMP21]], i64 [[TMP24]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP21]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP26]], align 16 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP17]], i32 1, ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP29]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -749,16 +765,16 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 2 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP13]], align 8 // CHECK1-NEXT: ret void @@ -790,11 +806,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -825,16 +841,16 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 @@ -868,14 +884,14 @@ // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP33]], align 128 // CHECK1-NEXT: store i32 [[TMP45]], ptr [[TMP23]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP24]], ptr align 4 [[TMP34]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP26]], i64 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[TMP26]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP46]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: @@ -895,23 +911,23 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP7]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // @@ -925,16 +941,16 @@ // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP3]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -959,7 +975,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -972,7 +988,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -998,43 +1014,46 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1077,7 +1096,7 @@ // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 @@ -1120,7 +1139,7 @@ // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1140,7 +1159,7 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[TMP25]], align 8 // CHECK3-NEXT: store i32 11, ptr [[TMP26]], align 4 // CHECK3-NEXT: store ptr [[TMP25]], ptr [[REF_TMP_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8, !noalias !14 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) // CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1194,50 +1213,54 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1337,7 +1360,7 @@ // CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1416,64 +1439,71 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[A_ADDR]], ptr [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ADDR]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP13]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP10]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 9 -// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP10]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 +// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP21]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP14]], i32 1, ptr [[TMP19]], ptr [[TMP20]], i64 [[TMP23]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1558,9 +1588,9 @@ // CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: call void [[TMP21]](ptr [[TMP22]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1617,49 +1647,54 @@ // CHECK6-SAME: () #[[ATTR0:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK6-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 88, i64 8, ptr @.omp_task_entry.) -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 8, ptr @.omp_task_entry.) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP8]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr [[TMP14]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr [[TMP15]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP16]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP13]], ptr [[TMP14]], i64 [[TMP17]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -60,14 +60,13 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -76,83 +75,94 @@ // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP17]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -233,37 +243,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -291,7 +306,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -344,98 +359,104 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP23]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP21]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK1-NEXT: [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP21]], i32 [[TMP27]], ptr [[TMP28]], ptr [[TMP29]], i64 [[TMP33]], i32 1, i32 2, i64 [[TMP34]], ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP29]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP30]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP32]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP29]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK1-NEXT: [[TMP35:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP38]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP40]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP29]], i32 [[TMP35]], ptr [[TMP36]], ptr [[TMP37]], i64 [[TMP41]], i32 1, i32 2, i64 [[TMP42]], ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -485,11 +506,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -522,24 +543,24 @@ // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK1-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -571,9 +592,9 @@ // CHECK1: taskloop.if.then.i: // CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -637,11 +658,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: ret void // // @@ -671,7 +692,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -679,77 +700,86 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -784,7 +814,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -817,7 +847,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -835,7 +865,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -872,14 +902,13 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -888,83 +917,94 @@ // CHECK2-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK2-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK2-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK2-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK2-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK2-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK2-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK2-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK2-NEXT: ret i32 [[TMP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK2-NEXT: ret i32 [[TMP17]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1045,37 +1085,42 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1103,7 +1148,7 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1156,98 +1201,104 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP23]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK2-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP21]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP22]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK2-NEXT: [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP21]], i32 [[TMP27]], ptr [[TMP28]], ptr [[TMP29]], i64 [[TMP33]], i32 1, i32 2, i64 [[TMP34]], ptr @.omp_task_dup.) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK2-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK2-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK2-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP29]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP30]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP32]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP29]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK2-NEXT: [[TMP35:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: store i64 [[TMP38]], ptr [[TMP37]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP40]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP41:%.*]] = load i64, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP42:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP29]], i32 [[TMP35]], ptr [[TMP36]], ptr [[TMP37]], i64 [[TMP41]], i32 1, i32 2, i64 [[TMP42]], ptr @.omp_task_dup.) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1297,11 +1348,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -1334,24 +1385,24 @@ // CHECK2-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK2-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK2-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -1383,9 +1434,9 @@ // CHECK2: taskloop.if.then.i: // CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1449,11 +1500,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: ret void // // @@ -1483,7 +1534,7 @@ // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1491,77 +1542,86 @@ // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK2-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK2-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK2-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK2-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK2-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1596,7 +1656,7 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1629,7 +1689,7 @@ // CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK2-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -1647,7 +1707,7 @@ // CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK2-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP18]], i32 0, i32 1 // CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1684,14 +1744,13 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1700,83 +1759,94 @@ // CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK3-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK3-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP14]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP17]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1857,37 +1927,42 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1915,7 +1990,7 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1968,103 +2043,109 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -// CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 [[IDXPROM7]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP19]] to i32 -// CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3: omp_if.then: +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP23]], i64 [[IDXPROM]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 [[IDXPROM8]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[SUB11:%.*]] = sub i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK3-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK3-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 32, ptr @.omp_task_entry..7) -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP23]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP24]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP26]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP23]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL16]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP30]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK3-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP34]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP23]], i32 [[TMP29]], ptr [[TMP30]], ptr [[TMP31]], i64 [[TMP35]], i32 1, i32 2, i64 [[TMP36]], ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[CONV11:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[SUB12:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[SUB13:%.*]] = sub i32 [[SUB12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB13]], 1 +// CHECK3-NEXT: [[DIV14:%.*]] = udiv i32 [[ADD]], 1 +// CHECK3-NEXT: [[CONV15:%.*]] = zext i32 [[DIV14]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV11]], [[CONV15]] +// CHECK3-NEXT: [[SUB16:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 32, ptr @.omp_task_entry..7) +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP31]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP32]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP34]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP31]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK3-NEXT: [[TMP37:%.*]] = sext i1 [[TOBOOL17]] to i32 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP38]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: store i64 [[TMP40]], ptr [[TMP39]], align 8 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP42]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP31]], i32 [[TMP37]], ptr [[TMP38]], ptr [[TMP39]], i64 [[TMP43]], i32 1, i32 2, i64 [[TMP44]], ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -2114,11 +2195,11 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -2151,24 +2232,24 @@ // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK3-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK3-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK3-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -2200,11 +2281,11 @@ // CHECK3: taskloop.if.then.i: // CHECK3-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK3-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 3 // CHECK3-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 // CHECK3-NEXT: [[TOBOOL_I:%.*]] = trunc i8 [[TMP54]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL_I]], label [[OMP_IF_THEN_I:%.*]], label [[OMP_IF_ELSE_I:%.*]] @@ -2316,11 +2397,11 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: ret void // // @@ -2350,7 +2431,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2358,77 +2439,86 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK3-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK3-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK3-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK3-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -2463,7 +2553,7 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -2496,7 +2586,7 @@ // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK3-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !64 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -2514,7 +2604,7 @@ // CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK3-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP18]], i32 0, i32 1 // CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp @@ -200,7 +200,7 @@ // CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 -// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) @@ -212,19 +212,24 @@ // CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 // CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[TMP5]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -232,8 +237,8 @@ // CHECK: arraydestroy.done1: // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -276,88 +281,89 @@ // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 -// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 -// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[TMP8:%.*]] = load %struct.S*, %struct.S** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 -// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP6]], [2 x %struct.S]** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP8]], %struct.S** [[TMP14]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 16, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP16]], i32 0, i32 1 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP22]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x %struct.S]* [[TMP6]] to %struct.S* +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP24]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP23]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) // CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done1: -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 -// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 -// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 -// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* -// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 -// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 -// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** -// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 -// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 -// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 -// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 -// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) -// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 -// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) -// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP25]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP8]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 2 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP27]], i32* [[TMP26]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 3 +// CHECK-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP28]] to i8* +// CHECK-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP21]], i32 0, i32 4 +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP32]], i32* [[TMP31]], align 4 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 3 +// CHECK-NEXT: [[TMP34:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP33]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP34]], align 8 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP36]], align 8 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 9 +// CHECK-NEXT: [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP39]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP40:%.*]] = load i64, i64* [[TMP37]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP15]], i32 1, i64* [[TMP35]], i64* [[TMP36]], i64 [[TMP40]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void @@ -410,7 +416,7 @@ // CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 @@ -428,7 +434,7 @@ // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -457,8 +463,8 @@ // CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @@ -512,9 +518,9 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP9]], i32 0, i32 0 // CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 // CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 // CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* @@ -531,7 +537,7 @@ // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done3: // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[TMP9]], i32 0, i32 1 // CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 // CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) // CHECK-NEXT: ret void @@ -578,44 +584,49 @@ // CHECK-SAME: () #[[ATTR9:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) // CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 // CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) -// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) -// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK-NEXT: store [2 x %struct.S.1]* [[S_ARR]], [2 x %struct.S.1]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK-NEXT: store %struct.S.1* [[VAR]], %struct.S.1** [[TMP5]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK: arraydestroy.done1: -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -673,160 +684,161 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 -// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon.2* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.2*, align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK-NEXT: store %struct.anon.2* [[__CONTEXT]], %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load [2 x i32]*, [2 x i32]** [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 8 +// CHECK-NEXT: store i32 [[TMP4]], i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP0]], i32 0, i32 3 +// CHECK-NEXT: [[TMP8:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK: omp_if.then: -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 -// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 -// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 -// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 -// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* -// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 -// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.1]* [[TMP6]], [2 x %struct.S.1]** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.1* [[TMP8]], %struct.S.1** [[TMP14]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to %struct.kmp_task_t_with_privates.4* +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 128 +// CHECK-NEXT: [[TMP20:%.*]] = bitcast %struct.anon.3* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 16, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: store i32 [[TMP23]], i32* [[TMP22]], align 128 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 1 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP24]] to i8* +// CHECK-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP27]], i32 0, i32 0 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast [2 x %struct.S.1]* [[TMP6]] to %struct.S.1* +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP29]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP28]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done1: -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 -// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** -// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 -// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 -// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 -// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 -// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* -// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) -// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 -// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) -// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP21]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP30]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP8]], i32 noundef 0) +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 3 +// CHECK-NEXT: [[TMP32:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP31]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.4*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP33]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP34]], align 16 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP17]], i32 0, i32 9 +// CHECK-NEXT: [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP38:%.*]] = load i64, i64* [[TMP35]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i8* [[TMP15]], i32 1, i64* [[TMP33]], i64* [[TMP34]], i64 [[TMP38]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) // CHECK-NEXT: br label [[OMP_IF_END]] // CHECK: omp_if.end: // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 -// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-SAME: (%struct..kmp_privates.t.5* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.1]** noalias noundef [[TMP3:%.*]], %struct.S.1** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.5*, align 8 // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 -// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 -// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 -// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.1]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.1**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.5* [[TMP0]], %struct..kmp_privates.t.5** [[DOTADDR]], align 8 // CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 // CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.1]** [[TMP3]], [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.1** [[TMP4]], %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.5*, %struct..kmp_privates.t.5** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 // CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 1 // CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 // CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 -// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 -// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 -// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 -// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.1]**, [2 x %struct.S.1]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.1]* [[TMP10]], [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.1**, %struct.S.1*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.1* [[TMP12]], %struct.S.1** [[TMP13]], align 8 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 -// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 @@ -838,27 +850,27 @@ // CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.3*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 // CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 -// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 -// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.1]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 0 // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* -// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.5* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.4* [[TMP3]] to i8* // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 // CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 @@ -877,23 +889,23 @@ // CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 -// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.5*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 // CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 -// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 -// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store %struct.anon.3* [[TMP8]], %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.3*, %struct.anon.3** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 -// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* -// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.1]**, %struct.S.1**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.1]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.1** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 -// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 -// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.1*, %struct.S.1** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 // CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 // CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 @@ -910,9 +922,9 @@ // CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128, !llvm.access.group [[ACC_GRP33]] // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 // CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 -// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* -// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.1* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.1* [[TMP29]] to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false), !llvm.access.group [[ACC_GRP33]] // CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] // CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 @@ -923,66 +935,66 @@ // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 -// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (%struct.kmp_task_t_with_privates.4* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP0]], %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 // CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 -// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 -// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 -// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], %struct.kmp_task_t_with_privates.4* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.3* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], %struct.anon.3* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.1]*, [2 x %struct.S.1]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.1]* [[TMP12]] to %struct.S.1* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.1* [[ARRAY_BEGIN]], [[TMP14]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK: omp.arraycpy.body: -// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) -// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.1* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], %struct.S.1* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.1* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK: omp.arraycpy.done3: -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 -// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 -// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], %struct.anon.3* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.1*, %struct.S.1** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 -// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.4* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.4*, align 8 // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 -// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.4* [[TMP1]], %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.4*, %struct.kmp_task_t_with_privates.4** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], %struct.kmp_task_t_with_privates.4* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], %struct..kmp_privates.t.5* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], [2 x %struct.S.1]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[ARRAY_BEGIN]], i64 2 // CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK: arraydestroy.body: -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 -// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.1* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.1* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK: arraydestroy.done2: // CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 @@ -990,39 +1002,39 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK-NEXT: store i32 0, i32* [[F]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.1* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[S]], %struct.S.1** [[S_ADDR]], align 8 // CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 -// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.1*, %struct.S.1** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], %struct.S.1* [[TMP0]], i32 0, i32 0 // CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] @@ -1031,25 +1043,25 @@ // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], %struct.S.1* [[THIS1]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev -// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-SAME: (%struct.S.1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 -// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 -// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.1*, align 8 +// CHECK-NEXT: store %struct.S.1* [[THIS]], %struct.S.1** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.1*, %struct.S.1** [[THIS_ADDR]], align 8 // CHECK-NEXT: ret void // // @@ -1064,44 +1076,47 @@ // // // LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. -// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // LAMBDA-NEXT: entry: // LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// LAMBDA-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // LAMBDA: omp_if.then: -// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* -// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 -// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 -// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 -// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 -// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 -// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 -// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 -// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 -// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 -// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 -// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 -// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 -// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 -// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 -// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* -// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) -// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 -// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) -// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// LAMBDA-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP10:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP10]], double* [[TMP9]], align 8 +// LAMBDA-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP12:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP15]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP5]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // LAMBDA-NEXT: br label [[OMP_IF_END]] // LAMBDA: omp_if.end: // LAMBDA-NEXT: ret void @@ -1139,12 +1154,12 @@ // LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 // LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 @@ -1155,7 +1170,7 @@ // LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1184,8 +1199,8 @@ // LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* @@ -1207,11 +1222,11 @@ // LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] // LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] // LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] -// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 0 // LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] -// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP_I]], i32 0, i32 1 // LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] -// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.1* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] // LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] // LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 // LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] @@ -1236,52 +1251,56 @@ // BLOCKS-NEXT: entry: // BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 // BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // BLOCKS-NEXT: ret void // // // BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. -// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // BLOCKS-NEXT: entry: // BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 // BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// BLOCKS-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // BLOCKS: omp_if.then: -// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* -// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 -// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 -// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 -// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 -// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 -// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 -// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 -// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 -// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 -// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 -// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 -// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 -// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 -// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 -// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 -// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* -// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) -// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 -// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) -// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP10:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP10]], double* [[TMP9]], align 8 +// BLOCKS-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP12:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP15]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP7]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP17]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP18:%.*]] = load i64, i64* [[TMP15]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP5]], i32 1, i64* [[TMP13]], i64* [[TMP14]], i64 [[TMP18]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // BLOCKS-NEXT: br label [[OMP_IF_END]] // BLOCKS: omp_if.end: // BLOCKS-NEXT: ret void @@ -1334,7 +1353,7 @@ // BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 // BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 // BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1350,7 +1369,7 @@ // BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1379,8 +1398,8 @@ // BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* @@ -1439,69 +1458,76 @@ // ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 // ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 // ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 -// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP1]], i64* [[TMP2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP4:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP4]], float** [[TMP3]], align 8 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP6]], %struct.St** [[TMP5]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // ARRAY-NEXT: ret void // // // ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. -// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], %struct.anon* noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // ARRAY-NEXT: entry: // ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 -// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 -// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 // ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 -// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY-NEXT: store %struct.anon* [[__CONTEXT]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP0]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP0]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// ARRAY-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// ARRAY-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // ARRAY: omp_if.then: -// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 -// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* -// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) -// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 -// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 -// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 -// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 -// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 -// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 -// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 -// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 -// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 -// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 -// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 -// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 -// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 -// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 -// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* -// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) -// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 -// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) -// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP2]], i64* [[TMP9]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: [[TMP10:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP11]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP16]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8 +// ARRAY-NEXT: store float* [[TMP18]], float** [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP16]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP20:%.*]] = load %struct.St*, %struct.St** [[TMP4]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP20]], %struct.St** [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP21]], align 8 +// ARRAY-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP22]], align 8 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP12]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP25:%.*]] = bitcast i8** [[TMP24]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP25]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP26:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP10]], i32 1, i64* [[TMP21]], i64* [[TMP22]], i64 [[TMP26]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) // ARRAY-NEXT: br label [[OMP_IF_END]] // ARRAY: omp_if.end: // ARRAY-NEXT: ret void @@ -1539,7 +1565,7 @@ // ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 // ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 // ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 -// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.0*, align 8 // ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 // ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 // ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 @@ -1554,7 +1580,7 @@ // ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -1583,9 +1609,9 @@ // ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 // ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP22]], i32 0, i32 0 // ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 // ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -217,15 +218,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -233,8 +242,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -261,51 +270,50 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP23]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -315,22 +323,22 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP17]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP13]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP18]], i32 1, ptr [[TMP26]], ptr [[TMP27]], i64 [[TMP30]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -432,18 +440,18 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]], ptr [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 4 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -570,38 +578,47 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..2, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,74 +679,73 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP16]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP20]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP17]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP21]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP16]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP21]], align 16 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP23]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP12]], i32 1, ptr [[TMP20]], ptr [[TMP21]], i64 [[TMP24]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP21]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP26]], align 16 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP17]], i32 1, ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP29]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -749,16 +765,16 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 2 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP13]], align 8 // CHECK1-NEXT: ret void @@ -790,11 +806,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -825,16 +841,16 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 @@ -868,14 +884,14 @@ // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP33]], align 128 // CHECK1-NEXT: store i32 [[TMP45]], ptr [[TMP23]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP24]], ptr align 4 [[TMP34]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP26]], i64 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[TMP26]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP46]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: @@ -895,23 +911,23 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP7]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // @@ -925,16 +941,16 @@ // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP3]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -959,7 +975,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -972,7 +988,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -998,43 +1014,46 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1077,7 +1096,7 @@ // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 @@ -1120,7 +1139,7 @@ // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1140,7 +1159,7 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: store i32 11, ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: store ptr [[TMP25]], ptr [[REF_TMP_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] @@ -1194,50 +1213,54 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1337,7 +1360,7 @@ // CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1416,64 +1439,71 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[A_ADDR]], ptr [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ADDR]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP13]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP10]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 9 -// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP10]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 +// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP21]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP14]], i32 1, ptr [[TMP19]], ptr [[TMP20]], i64 [[TMP23]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1558,9 +1588,9 @@ // CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: call void [[TMP21]](ptr [[TMP22]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1618,53 +1648,59 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[I]], ptr [[J]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[J]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK6-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK6-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, ptr @.omp_task_entry.) -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, ptr [[TMP12]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, ptr [[TMP13]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, ptr [[TMP14]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 -// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP8]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr null) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 80, i64 16, ptr @.omp_task_entry.) +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP11]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP14]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr [[TMP16]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr [[TMP17]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP11]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void @@ -1729,7 +1765,7 @@ // CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 // CHECK6-NEXT: store i32 [[TMP20]], ptr [[DOTLINEAR_START_I]], align 4, !noalias !14 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK6-NEXT: store i32 [[TMP23]], ptr [[DOTLINEAR_START1_I]], align 4, !noalias !14 @@ -1764,7 +1800,7 @@ // CHECK6-NEXT: br i1 [[TMP34]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK6: .omp.linear.pu.i: // CHECK6-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP18]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP18]], i32 0, i32 1 // CHECK6-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK6-NEXT: [[TMP38:%.*]] = load i32, ptr [[J_I]], align 4, !noalias !14 // CHECK6-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -177,6 +177,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -187,7 +188,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] @@ -243,10 +244,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -254,6 +256,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -265,18 +269,18 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -299,30 +303,31 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -337,6 +342,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -350,16 +356,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -367,21 +375,23 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -453,39 +463,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -509,7 +522,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -518,34 +531,39 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 2 to ptr)) -// CHECK1-NEXT: store ptr [[DOTA__VOID_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTA__VOID_ADDR]], ptr inttoptr (i64 2 to ptr)) +// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 4, ptr inttoptr (i64 2 to ptr)) +// CHECK1-NEXT: store ptr [[DOTA__VOID_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTA__VOID_ADDR]], ptr inttoptr (i64 2 to ptr)) // CHECK1-NEXT: ret void // // @@ -557,7 +575,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -602,6 +620,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -615,16 +634,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -633,20 +654,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -655,6 +678,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -674,16 +698,18 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -691,40 +717,45 @@ // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 // CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -759,22 +790,26 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 // CHECK4-NEXT: store i32 20, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 @@ -788,14 +823,14 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP1]], ptr [[BLOCK_CAPTURED]], align 128 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 32 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK4-NEXT: call void [[TMP3]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: call void [[TMP4]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -818,6 +853,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -831,16 +867,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -849,8 +887,10 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 @@ -864,19 +904,19 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED3]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK4-NEXT: call void [[TMP5]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void [[TMP7]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -885,6 +925,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -903,16 +944,18 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[THIS]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -920,20 +963,22 @@ // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -332,18 +332,21 @@ // CHECK1-SAME: (ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -352,107 +355,109 @@ // CHECK1-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP20]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 -// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK1-NEXT: [[TMP25:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done7: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK1: omp.arraycpy.body9: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK1-NEXT: [[CONV12:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP27:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP32:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i16 [[TMP27]], ptr [[_TMP13]], align 2 -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[_TMP13]], align 2 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK1-NEXT: [[TMP29:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP29]] to i32 +// CHECK1-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i16 [[TMP29]], ptr [[_TMP13]], align 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load i16, ptr [[_TMP13]], align 2 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK1-NEXT: store i16 [[CONV17]], ptr [[ATOMIC_TEMP]], align 2 -// CHECK1-NEXT: [[TMP30:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 -// CHECK1-NEXT: [[TMP31:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP27]], i16 [[TMP30]] monotonic monotonic, align 2 -// CHECK1-NEXT: [[TMP32]] = extractvalue { i16, i1 } [[TMP31]], 0 -// CHECK1-NEXT: [[TMP33:%.*]] = extractvalue { i16, i1 } [[TMP31]], 1 -// CHECK1-NEXT: br i1 [[TMP33]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 +// CHECK1-NEXT: [[TMP33:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 +// CHECK1-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 +// CHECK1-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 +// CHECK1-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP25]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK1: omp.arraycpy.done21: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP34]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP36]]) // CHECK1-NEXT: ret void // // @@ -506,6 +511,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -517,32 +525,58 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..1, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: br label [[IF_END]] // CHECK1: if.end: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[CF]]) -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL1]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CF]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL3]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP14]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -582,151 +616,148 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP7:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL7]], 0.000000e+00 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = uitofp i1 [[TMP16]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV10]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV5:%.*]] = uitofp i1 [[TMP23]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV5]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw fadd ptr [[TMP1]], float [[TMP21]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[CALL11]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]] -// CHECK1: land.rhs15: -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END18]] -// CHECK1: land.end18: -// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ] -// CHECK1-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP23]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[REF_TMP12]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP5]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw fadd ptr [[TMP4]], float [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[CALL6]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[LAND_RHS10:%.*]], label [[LAND_END13:%.*]] +// CHECK1: land.rhs10: +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP30:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL12]], [[LAND_RHS10]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = uitofp i1 [[TMP30]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]], float noundef [[CONV14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[REF_TMP7]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP12]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP33:%.*]], [[COND_END23:%.*]] ] -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float -// CHECK1-NEXT: store float [[TMP26]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP27]], [[TMP28]] -// CHECK1-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]] -// CHECK1: cond.true21: -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.false22: -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.end23: -// CHECK1-NEXT: [[COND24:%.*]] = phi float [ [[TMP29]], [[COND_TRUE21]] ], [ [[TMP30]], [[COND_FALSE22]] ] -// CHECK1-NEXT: store float [[COND24]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = cmpxchg ptr [[TMP5]], i32 [[TMP25]], i32 [[TMP31]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP33]] = extractvalue { i32, i1 } [[TMP32]], 0 -// CHECK1-NEXT: [[TMP34:%.*]] = extractvalue { i32, i1 } [[TMP32]], 1 -// CHECK1-NEXT: br i1 [[TMP34]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP32:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END13]] ], [ [[TMP40:%.*]], [[COND_END18:%.*]] ] +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +// CHECK1-NEXT: store float [[TMP33]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP15:%.*]] = fcmp olt float [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] +// CHECK1: cond.true16: +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.false17: +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.end18: +// CHECK1-NEXT: [[COND19:%.*]] = phi float [ [[TMP36]], [[COND_TRUE16]] ], [ [[TMP37]], [[COND_FALSE17]] ] +// CHECK1-NEXT: store float [[COND19]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = cmpxchg ptr [[TMP12]], i32 [[TMP32]], i32 [[TMP38]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP40]] = extractvalue { i32, i1 } [[TMP39]], 0 +// CHECK1-NEXT: [[TMP41:%.*]] = extractvalue { i32, i1 } [[TMP39]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -822,102 +853,101 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[WHILE_COND:%.*]] // CHECK1: while.cond: // CHECK1-NEXT: br label [[WHILE_BODY:%.*]] // CHECK1: while.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP5:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CF:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CF_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CF1:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CF]], ptr [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[CF1_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF1_REALP]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF1_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[CF1]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[CF_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF_REALP]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF_IMAGP]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[CF]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4 -// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4 -// CHECK1-NEXT: [[CF1_REALP2:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL:%.*]] = load float, ptr [[CF1_REALP2]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP3:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG:%.*]] = load float, ptr [[CF1_IMAGP3]], align 4 -// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF1_REAL]] -// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF1_IMAG]] -// CHECK1-NEXT: [[DOTREALP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[DOTIMAGP5:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R]], ptr [[DOTREALP4]], align 4 -// CHECK1-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[CF_REALP1:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL:%.*]] = load float, ptr [[CF_REALP1]], align 4 +// CHECK1-NEXT: [[CF_IMAGP2:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG:%.*]] = load float, ptr [[CF_IMAGP2]], align 4 +// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF_REAL]] +// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF_IMAG]] +// CHECK1-NEXT: [[DOTREALP3:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R]], ptr [[DOTREALP3]], align 4 +// CHECK1-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[CF1_REALP6:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL7:%.*]] = load float, ptr [[CF1_REALP6]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP8:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG9:%.*]] = load float, ptr [[CF1_IMAGP8]], align 4 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) +// CHECK1-NEXT: [[CF_REALP5:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL6:%.*]] = load float, ptr [[CF_REALP5]], align 4 +// CHECK1-NEXT: [[CF_IMAGP7:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG8:%.*]] = load float, ptr [[CF_IMAGP7]], align 4 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 0 @@ -928,21 +958,21 @@ // CHECK1-NEXT: [[TMP_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 // CHECK1-NEXT: store float [[ATOMIC_TEMP_REAL]], ptr [[TMP_REALP]], align 4 // CHECK1-NEXT: store float [[ATOMIC_TEMP_IMAG]], ptr [[TMP_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP_REALP11:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, ptr [[TMP_REALP11]], align 4 -// CHECK1-NEXT: [[TMP_IMAGP12:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, ptr [[TMP_IMAGP12]], align 4 -// CHECK1-NEXT: [[CF1_REALP13:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL14:%.*]] = load float, ptr [[CF1_REALP13]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP15:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG16:%.*]] = load float, ptr [[CF1_IMAGP15]], align 4 -// CHECK1-NEXT: [[ADD_R17:%.*]] = fadd float [[TMP_REAL]], [[CF1_REAL14]] -// CHECK1-NEXT: [[ADD_I18:%.*]] = fadd float [[TMP_IMAG]], [[CF1_IMAG16]] -// CHECK1-NEXT: [[ATOMIC_TEMP10_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[ATOMIC_TEMP10_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP10]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R17]], ptr [[ATOMIC_TEMP10_REALP]], align 4 -// CHECK1-NEXT: store float [[ADD_I18]], ptr [[ATOMIC_TEMP10_IMAGP]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP10]], i32 noundef 0, i32 noundef 0) +// CHECK1-NEXT: [[TMP_REALP10:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, ptr [[TMP_REALP10]], align 4 +// CHECK1-NEXT: [[TMP_IMAGP11:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, ptr [[TMP_IMAGP11]], align 4 +// CHECK1-NEXT: [[CF_REALP12:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL13:%.*]] = load float, ptr [[CF_REALP12]], align 4 +// CHECK1-NEXT: [[CF_IMAGP14:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG15:%.*]] = load float, ptr [[CF_IMAGP14]], align 4 +// CHECK1-NEXT: [[ADD_R16:%.*]] = fadd float [[TMP_REAL]], [[CF_REAL13]] +// CHECK1-NEXT: [[ADD_I17:%.*]] = fadd float [[TMP_IMAG]], [[CF_IMAG15]] +// CHECK1-NEXT: [[ATOMIC_TEMP9_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[ATOMIC_TEMP9_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP9]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R16]], ptr [[ATOMIC_TEMP9_REALP]], align 4 +// CHECK1-NEXT: store float [[ADD_I17]], ptr [[ATOMIC_TEMP9_IMAGP]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP9]], i32 noundef 0, i32 noundef 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] @@ -985,41 +1015,54 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..8, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1030,6 +1073,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1048,14 +1092,22 @@ // CHECK1-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..6, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK1-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK1-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -1063,87 +1115,86 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK1-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK1-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B4]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[C5]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP23]] monotonic, align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP27]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1258,124 +1309,121 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4]], align 4 +// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca [[STRUCT_S_4]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 128 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP1]], align 128 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP3]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 128 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP23]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 128 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 128 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP5]], align 128 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP12]], align 128 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR15]], align 128 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR1]], align 128 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP5]], align 128 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP12]], align 128 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP21]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP3]], ptr align 4 [[CALL10]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] -// CHECK1: land.rhs14: -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL16:%.*]] = icmp ne i32 [[CALL15]], 0 -// CHECK1-NEXT: br label [[LAND_END17]] -// CHECK1: land.end17: -// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] -// CHECK1-NEXT: [[CONV18:%.*]] = zext i1 [[TMP23]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], i32 noundef [[CONV18]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 4 [[REF_TMP11]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP24]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[CALL5]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[LAND_RHS9:%.*]], label [[LAND_END12:%.*]] +// CHECK1: land.rhs9: +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 +// CHECK1-NEXT: br label [[LAND_END12]] +// CHECK1: land.end12: +// CHECK1-NEXT: [[TMP30:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL11]], [[LAND_RHS9]] ] +// CHECK1-NEXT: [[CONV13:%.*]] = zext i1 [[TMP30]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]], i32 noundef [[CONV13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP10]], ptr align 4 [[REF_TMP6]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP12]], i32 [[TMP31]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -1384,7 +1432,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 @@ -1476,7 +1524,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1487,76 +1535,82 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..10, ptr [[THIS1]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[A1]], align 4 -// CHECK1-NEXT: store ptr [[A1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store i32 1, ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP5]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP12:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP17:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[MUL4]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP12]], i32 [[TMP15]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP17]] = extractvalue { i32, i1 } [[TMP16]], 0 -// CHECK1-NEXT: [[TMP18:%.*]] = extractvalue { i32, i1 } [[TMP16]], 1 -// CHECK1-NEXT: br i1 [[TMP18]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP15:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP20:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = cmpxchg ptr [[TMP5]], i32 [[TMP15]], i32 [[TMP18]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP20]] = extractvalue { i32, i1 } [[TMP19]], 0 +// CHECK1-NEXT: [[TMP21:%.*]] = extractvalue { i32, i1 } [[TMP19]], 1 +// CHECK1-NEXT: br i1 [[TMP21]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -1591,7 +1645,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1612,18 +1666,21 @@ // CHECK3-SAME: (ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -1632,107 +1689,109 @@ // CHECK3-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK3-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK3-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP20]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK3-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP23:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK3-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done7: -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK3: omp.arraycpy.body9: // CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK3-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 // CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP27:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP32:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: store i16 [[TMP27]], ptr [[_TMP13]], align 2 -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[_TMP13]], align 2 -// CHECK3-NEXT: [[CONV14:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK3-NEXT: [[TMP29:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP29]] to i32 +// CHECK3-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] +// CHECK3-NEXT: store i16 [[TMP29]], ptr [[_TMP13]], align 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load i16, ptr [[_TMP13]], align 2 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK3-NEXT: store i16 [[CONV17]], ptr [[ATOMIC_TEMP]], align 2 -// CHECK3-NEXT: [[TMP30:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 -// CHECK3-NEXT: [[TMP31:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP27]], i16 [[TMP30]] monotonic monotonic, align 2 -// CHECK3-NEXT: [[TMP32]] = extractvalue { i16, i1 } [[TMP31]], 0 -// CHECK3-NEXT: [[TMP33:%.*]] = extractvalue { i16, i1 } [[TMP31]], 1 -// CHECK3-NEXT: br i1 [[TMP33]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 +// CHECK3-NEXT: [[TMP33:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 +// CHECK3-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 +// CHECK3-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 +// CHECK3-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP25]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK3: omp.arraycpy.done21: // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK3-NEXT: call void @llvm.stackrestore(ptr [[TMP34]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK3-NEXT: call void @llvm.stackrestore(ptr [[TMP36]]) // CHECK3-NEXT: ret void // // @@ -1806,6 +1865,7 @@ // CHECK3-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1824,14 +1884,22 @@ // CHECK3-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK3-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK3-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK3-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK3-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -1839,88 +1907,87 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK3-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK3-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[C5]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP16]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP24]] monotonic, align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP26]] monotonic, align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP28]] monotonic, align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP29]] monotonic, align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP31]] monotonic, align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP33]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1930,32 +1997,41 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR0]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK3-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -1996,87 +2072,86 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[A2]], align 4 -// CHECK3-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[C5]], align 4 -// CHECK3-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[C5]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[A]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[AND7:%.*]] = and i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[AND7]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[AND8:%.*]] = and i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store i32 [[AND8]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[AND4:%.*]] = and i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[AND4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[AND5:%.*]] = and i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[AND5]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw and ptr [[TMP4]], i32 [[TMP23]] monotonic, align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw and ptr [[TMP5]], i32 [[TMP27]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw and ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = atomicrmw and ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2119,42 +2194,44 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[G]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP3]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP5]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 128 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[G]], align 128 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 128 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK3-NEXT: [[TMP9:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP8]] monotonic, align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[G]], align 128 +// CHECK3-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP10]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2184,18 +2261,21 @@ // CHECK4-SAME: (ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -2204,107 +2284,109 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK4-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK4-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK4-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK4-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK4-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK4: omp.arrayinit.body: // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK4-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK4: omp.arrayinit.done: -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK4-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK4-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP20]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK4-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK4-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK4: omp.arraycpy.body: // CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK4-NEXT: [[TMP23:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 -// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 -// CHECK4-NEXT: [[TMP24:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP24]] to i32 +// CHECK4-NEXT: [[TMP25:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 +// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP25]] to i32 +// CHECK4-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV3]] // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD]] to i16 // CHECK4-NEXT: store i16 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK4: omp.arraycpy.done7: -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK4: omp.arraycpy.body9: // CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[ATOMIC_EXIT:%.*]] ] // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[ATOMIC_EXIT]] ] -// CHECK4-NEXT: [[TMP26:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP26]] to i32 +// CHECK4-NEXT: [[TMP28:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP28]] to i32 // CHECK4-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]] monotonic, align 2 // CHECK4-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK4: atomic_cont: -// CHECK4-NEXT: [[TMP27:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP32:%.*]], [[ATOMIC_CONT]] ] -// CHECK4-NEXT: store i16 [[TMP27]], ptr [[_TMP13]], align 2 -// CHECK4-NEXT: [[TMP28:%.*]] = load i16, ptr [[_TMP13]], align 2 -// CHECK4-NEXT: [[CONV14:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK4-NEXT: [[TMP29:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 -// CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP29]] to i32 +// CHECK4-NEXT: [[TMP29:%.*]] = phi i16 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY9]] ], [ [[TMP34:%.*]], [[ATOMIC_CONT]] ] +// CHECK4-NEXT: store i16 [[TMP29]], ptr [[_TMP13]], align 2 +// CHECK4-NEXT: [[TMP30:%.*]] = load i16, ptr [[_TMP13]], align 2 +// CHECK4-NEXT: [[CONV14:%.*]] = sext i16 [[TMP30]] to i32 +// CHECK4-NEXT: [[TMP31:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], align 2 +// CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP31]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV14]], [[CONV15]] // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i16 // CHECK4-NEXT: store i16 [[CONV17]], ptr [[ATOMIC_TEMP]], align 2 -// CHECK4-NEXT: [[TMP30:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 -// CHECK4-NEXT: [[TMP31:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP27]], i16 [[TMP30]] monotonic monotonic, align 2 -// CHECK4-NEXT: [[TMP32]] = extractvalue { i16, i1 } [[TMP31]], 0 -// CHECK4-NEXT: [[TMP33:%.*]] = extractvalue { i16, i1 } [[TMP31]], 1 -// CHECK4-NEXT: br i1 [[TMP33]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK4-NEXT: [[TMP32:%.*]] = load i16, ptr [[ATOMIC_TEMP]], align 2 +// CHECK4-NEXT: [[TMP33:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i16 [[TMP29]], i16 [[TMP32]] monotonic monotonic, align 2 +// CHECK4-NEXT: [[TMP34]] = extractvalue { i16, i1 } [[TMP33]], 0 +// CHECK4-NEXT: [[TMP35:%.*]] = extractvalue { i16, i1 } [[TMP33]], 1 +// CHECK4-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK4: atomic_exit: // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP25]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP27]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY9]] // CHECK4: omp.arraycpy.done21: // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: [[TMP34:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK4-NEXT: call void @llvm.stackrestore(ptr [[TMP34]]) +// CHECK4-NEXT: [[TMP36:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK4-NEXT: call void @llvm.stackrestore(ptr [[TMP36]]) // CHECK4-NEXT: ret void // // @@ -2375,27 +2457,32 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, align 128 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[G]], align 128 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 128 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2407,30 +2494,30 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.2, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP1]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK4-NEXT: call void [[TMP3]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.3, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP7]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP3]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: call void [[TMP5]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.3, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 128 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 128 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK4-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP10]] monotonic, align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: [[TMP13:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP12]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2476,6 +2563,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2494,14 +2582,22 @@ // CHECK4-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK4-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK4-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK4-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK4-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK4-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -2509,42 +2605,41 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK4-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK4-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2556,54 +2651,54 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.7, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP6]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: call void [[TMP10]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[C5]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP11]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD9]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD10]], ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK4-NEXT: store i32 [[ADD7]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP23]] monotonic, align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP27]] monotonic, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2614,6 +2709,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2632,97 +2728,104 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK4-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK4-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[C5]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK4-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP23]] monotonic, align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP27]] monotonic, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -40,229 +40,235 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP57]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP58]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP59]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP60]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP57]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP65]], ptr [[TMP57]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP59]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP60]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP61]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP63]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP60]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 // CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP68]], i32 0) -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP72:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP74]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP75]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP68]], ptr [[TMP60]]) +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP71]], i32 0) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP78]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP76]], [[TMP77]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP78]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP79]], [[TMP80]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP79:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP79]] to i32 -// CHECK1-NEXT: [[TMP80:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP80]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP82]] to i32 +// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP83]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP78]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP74]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP81]] monotonic, align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP83]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP84]] monotonic, align 4 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP85:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP90:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP85]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP86]] to i32 -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP89:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP85]], i8 [[TMP88]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP90]] = extractvalue { i8, i1 } [[TMP89]], 0 -// CHECK1-NEXT: [[TMP91:%.*]] = extractvalue { i8, i1 } [[TMP89]], 1 -// CHECK1-NEXT: br i1 [[TMP91]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP88:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP93:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP88]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP88]], i8 [[TMP91]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP93]] = extractvalue { i8, i1 } [[TMP92]], 0 +// CHECK1-NEXT: [[TMP94:%.*]] = extractvalue { i8, i1 } [[TMP92]], 1 +// CHECK1-NEXT: br i1 [[TMP94]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP83]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP92]]) +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP95]]) // CHECK1-NEXT: ret void // // @@ -401,20 +407,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -428,7 +434,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/parallel_sections_codegen.cpp b/clang/test/OpenMP/parallel_sections_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_codegen.cpp @@ -57,17 +57,19 @@ // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -75,28 +77,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] @@ -113,18 +117,18 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP11]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -138,15 +142,17 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -154,28 +160,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: @@ -186,17 +194,17 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP11]]) #[[ATTR7]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR7]] // CHECK1-NEXT: unreachable // diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -40,271 +40,277 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = icmp slt i32 [[TMP53]], 0 -// CHECK1-NEXT: [[TMP55:%.*]] = select i1 [[TMP54]], i32 [[TMP53]], i32 0 -// CHECK1-NEXT: store i32 [[TMP55]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP56]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = icmp slt i32 [[TMP56]], 0 +// CHECK1-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 0 +// CHECK1-NEXT: store i32 [[TMP58]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP59]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP57]], [[TMP58]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP60]], [[TMP61]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP59]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP62]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP65]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP66]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP69]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP66]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP74]], ptr [[TMP66]]) +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[TMP71]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP72]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP77]], ptr [[TMP69]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP76]], 1 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP79]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP78]]) -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP80]], i32 1) -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP84:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP84]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP86]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP87]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP81]]) +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP83]], i32 1) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP87:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP89]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP90]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP88]], [[TMP89]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP91]], [[TMP92]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP86]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP93]] monotonic, align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP96]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP96]] monotonic, align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP97:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP102:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP97]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP97]], i8 [[TMP100]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP102]] = extractvalue { i8, i1 } [[TMP101]], 0 -// CHECK1-NEXT: [[TMP103:%.*]] = extractvalue { i8, i1 } [[TMP101]], 1 -// CHECK1-NEXT: br i1 [[TMP103]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP100:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP105:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP100]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP100]], i8 [[TMP103]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP105]] = extractvalue { i8, i1 } [[TMP104]], 0 +// CHECK1-NEXT: [[TMP106:%.*]] = extractvalue { i8, i1 } [[TMP104]], 1 +// CHECK1-NEXT: br i1 [[TMP106]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP104]]) +// CHECK1-NEXT: [[TMP107:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP107]]) // CHECK1-NEXT: ret void // // @@ -443,20 +449,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -470,7 +476,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/reduction_compound_op.cpp b/clang/test/OpenMP/reduction_compound_op.cpp --- a/clang/test/OpenMP/reduction_compound_op.cpp +++ b/clang/test/OpenMP/reduction_compound_op.cpp @@ -84,17 +84,73 @@ // NORM-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // NORM-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 // NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..13, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) +// NORM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP2]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP5]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP7]], align 8 +// NORM-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP8]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP9]], align 8 +// NORM-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP10]], align 8 +// NORM-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP11]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_3]]) +// NORM-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// NORM-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP13]], align 8 +// NORM-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP14]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// NORM-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP15]], align 8 +// NORM-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP16]], align 8 +// NORM-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP17]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_5]]) +// NORM-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP18]], align 8 +// NORM-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP19]], align 8 +// NORM-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP20]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// NORM-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP21]], align 8 +// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP22]], align 8 +// NORM-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP23]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_7]]) // NORM-NEXT: ret void // // @@ -109,13 +165,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined. -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -125,107 +179,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -254,13 +310,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..1 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -270,107 +324,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -399,13 +455,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..3 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -415,107 +469,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -544,13 +600,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..5 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -560,107 +614,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -689,13 +745,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..7 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -705,107 +759,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -834,13 +890,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..9 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -850,107 +904,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -979,13 +1035,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..11 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -995,107 +1049,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1124,13 +1180,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..13 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1140,107 +1194,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// NORM-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// NORM-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1287,17 +1343,73 @@ // COMP-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // COMP-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 // COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..13, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) +// COMP-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP2]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP5]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP7]], align 8 +// COMP-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP8]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP9]], align 8 +// COMP-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP10]], align 8 +// COMP-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP11]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_3]]) +// COMP-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// COMP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP13]], align 8 +// COMP-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP14]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// COMP-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP15]], align 8 +// COMP-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP16]], align 8 +// COMP-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP17]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_5]]) +// COMP-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP18]], align 8 +// COMP-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP19]], align 8 +// COMP-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP20]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// COMP-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP21]], align 8 +// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP22]], align 8 +// COMP-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP23]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_7]]) // COMP-NEXT: ret void // // @@ -1312,13 +1424,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined. -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1328,101 +1438,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1448,13 +1560,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..1 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1464,101 +1574,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1584,13 +1696,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..3 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1600,101 +1710,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1720,13 +1832,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..5 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1736,101 +1846,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1856,13 +1968,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..7 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1872,101 +1982,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1992,13 +2104,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..9 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2008,101 +2118,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2128,13 +2240,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..11 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2144,107 +2254,109 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2273,13 +2385,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..13 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2289,107 +2399,109 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 -// COMP-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// COMP-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -101,17 +101,23 @@ // CHECK-SAME: (ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) // CHECK-NEXT: ret void // CHECK: worker.exit: @@ -119,44 +125,46 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 0 -// CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2]], i64 [[TMP6]] -// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[E2]], ptr [[TMP11]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) -// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0 +// CHECK-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[E]], i64 [[TMP8]] +// CHECK-NEXT: store ptr [[TMP9]], ptr [[TMP]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[E]], ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +// CHECK-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK: .omp.reduction.then: -// CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[E2]], align 8 -// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP14]], [[TMP15]] +// CHECK-NEXT: [[TMP16:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[E]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] // CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX]], align 8 -// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP12]]) // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK: .omp.reduction.done: // CHECK-NEXT: ret void @@ -409,67 +417,72 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[O:%.*]] = alloca [[CLASS_S2:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] -// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr ([[CLASS_S2]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[CLASS_S2]], ptr [[O1]], i64 [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr ([[CLASS_S2]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[CLASS_S2]], ptr [[O]], i64 [[TMP6]] // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body: // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[O1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[O]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL2]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL1]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -497,18 +510,21 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -523,176 +539,178 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY2]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY5]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY7]], i64 1 -// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] -// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP8]] +// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP7:%.*]] = add nuw i64 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP12]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP14]] // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY10]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY12]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY15]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY17]], i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX18]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 -// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i32 1, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP29]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP31]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP32]], 9 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = add nuw i64 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP30]], i32 1, i32 1, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP34]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP33]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP35]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP34]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP36]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP37]], [[TMP38]] // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP37]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP39]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK1-NEXT: store i64 [[ADD20]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP40]]) // CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP42]], i32 1) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP45:%.*]] = inttoptr i64 [[TMP5]] to ptr -// CHECK1-NEXT: store ptr [[TMP45]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP48]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP42]]) +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP44]], i32 1) +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK1-NEXT: store ptr [[TMP47]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP50]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP49]] +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP51]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP50:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP50]], [[TMP51]] +// CHECK1-NEXT: [[TMP52:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP52]], [[TMP53]] // CHECK1-NEXT: store double [[ADD22]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP49]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP51]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done25: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP49]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP52]] +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP54]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]] // CHECK1: omp.arraycpy.body27: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ] -// CHECK1-NEXT: [[TMP53:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP53]] monotonic, align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP55]] monotonic, align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP52]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP54]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]] // CHECK1: omp.arraycpy.done33: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP55]]) +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP57]]) // CHECK1-NEXT: ret void // // @@ -1122,155 +1140,175 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK2-NEXT: store i32 0, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[OUTPUT1]], i64 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +// CHECK2-NEXT: store i32 0, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP10]] +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], ptr [[TMP24]], ptr [[TMP25]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK2-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT1]], ptr [[TMP30]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP32]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP43]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP44]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP43]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP36]] monotonic, align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP47]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1280,119 +1318,127 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -// CHECK2-NEXT: store i32 0, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[OUTPUT3]], i64 [[TMP11]] -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +// CHECK2-NEXT: store i32 0, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP20]] +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[_TMP3]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 0 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT3]], ptr [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP42]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP43]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP37]] monotonic, align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP46]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1446,189 +1492,209 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[OUTPUT2]], i64 [[TMP7]] -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP12]] +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP24]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..4, i32 [[TMP22]], i32 [[TMP23]], i32 [[TMP25]], ptr [[TMP26]], ptr [[TMP27]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK2-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP42]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT2]], ptr [[TMP32]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP45]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP46]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP36]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP47]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP36]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done13: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP47]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done12: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK2: omp.arraycpy.body15: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP40]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK2: omp.arraycpy.done21: +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK2: omp.arraycpy.body14: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP51]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK2: omp.arraycpy.done20: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1638,153 +1704,161 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4 -// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[OUTPUT4]], i64 [[TMP13]] -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP22]] +// CHECK2-NEXT: store ptr [[TMP23]], ptr [[_TMP4]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]] -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], [[TMP27]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX10]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 0 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP39]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT4]], ptr [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP44]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP45]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP46]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP37]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done17: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP46]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done16: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] -// CHECK2: omp.arraycpy.body19: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP41]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]] -// CHECK2: omp.arraycpy.done25: +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] +// CHECK2: omp.arraycpy.body18: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 [[TMP50]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY18]] +// CHECK2: omp.arraycpy.done24: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1859,106 +1933,112 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined..9, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca [2 x i32], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 1 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A2]], i64 [[TMP5]] +// CHECK2-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A2]], ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP14]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP18]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done6: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE4:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP18]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done5: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP17]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK2: omp.arraycpy.body8: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP18]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP17]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK2: omp.arraycpy.done14: +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY6:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP21]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY6]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY7:%.*]] +// CHECK2: omp.arraycpy.body7: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[A]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 [[TMP22]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP21]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY7]] +// CHECK2: omp.arraycpy.done13: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void @@ -2000,73 +2080,79 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined..13, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 3 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] -// CHECK2-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A1]], i64 [[TMP4]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 3 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP8]] // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP15]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP15]] monotonic, align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP19]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -9,12 +9,19 @@ void bar1(void) { #pragma omp parallel // #0 // safe-remark@#0 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#0 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } void bar2(void) { #pragma omp parallel // #1 // safe-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // all-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#1 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } @@ -26,10 +33,14 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #3 + // all-remark@#3 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#3 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); #pragma omp parallel // #4 + // all-remark@#4 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#4 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } @@ -41,11 +52,15 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #6 + // all-remark@#6 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#6 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); bar2(); #pragma omp parallel // #7 + // all-remark@#7 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#7 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); @@ -59,11 +74,15 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #9 + // all-remark@#9 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#9 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); bar2(); #pragma omp parallel // #10 + // all-remark@#10 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#10 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -6,8 +6,11 @@ void baz(void) __attribute__((assume("omp_no_openmp"))); void bar(void) { -#pragma omp parallel // #1 \ +#pragma omp parallel // #1 // expected-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // expected-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#1 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } } @@ -17,11 +20,17 @@ // expected-remark@#2 {{Rewriting generic-mode kernel with a customized state machine. [OMP131]}} { baz(); // expected-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} -#pragma omp parallel +#pragma omp parallel // #3 + // expected-remark@#3 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#3 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } bar(); -#pragma omp parallel +#pragma omp parallel // #4 + // expected-remark@#4 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#4 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } } diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp --- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp @@ -399,6 +399,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -407,22 +408,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -493,115 +502,114 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE7:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case7: -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR5]], i64 4, i1 false) +// CHECK1: .omp.sections.case3: +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN5]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP21]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done6: +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP26]]) // CHECK1-NEXT: ret void // // @@ -823,73 +831,75 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 10, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 10, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -1045,64 +1055,69 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 10, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 10, ptr [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1114,25 +1129,25 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP14]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK4-NEXT: call void [[TMP17]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp --- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp @@ -197,6 +197,8 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -206,24 +208,34 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -250,42 +262,40 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -295,85 +305,85 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store i32 31, ptr [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store i32 31, ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN2]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) // CHECK1-NEXT: ret void // // @@ -388,10 +398,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -400,34 +411,36 @@ // CHECK1-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: @@ -435,21 +448,21 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC2]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -457,35 +470,44 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -546,126 +568,125 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case5: -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP20]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN3]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done5: +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) // CHECK1-NEXT: ret void // // @@ -685,7 +706,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -698,7 +719,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -726,79 +747,81 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 13, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 13, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP15]], ptr @g, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP17]], ptr @g, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -832,60 +855,65 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 17, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 17, ptr [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -897,35 +925,35 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP10:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: call void [[TMP13]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK4-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP17]], ptr @g, align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP19]], ptr @g, align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -952,6 +980,8 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -961,24 +991,34 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done2: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP1]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1005,42 +1045,40 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1050,85 +1088,85 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK5-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: store i32 31, ptr [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store i32 31, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP21]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN2]], [[TMP27]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done7: +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) // CHECK5-NEXT: ret void // // @@ -1143,10 +1181,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1155,74 +1194,76 @@ // CHECK5-NEXT: [[X:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, ptr [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 -// CHECK5-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = icmp sle i32 [[TMP14]], [[TMP13]] +// CHECK5-NEXT: br i1 [[TMP15]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store i32 [[TMP13]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP16]], ptr @{{pl_cond[.].+[.|,]}} align 8 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.case1: // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[INC2]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]]) -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP4]]) +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr @{{pl_cond[.].+[.|,]}} align 8 -// CHECK5-NEXT: store double [[TMP19]], ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP20]], ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load double, ptr @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store double [[TMP20]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP21]], ptr @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // @@ -1230,35 +1271,44 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP1]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP5]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1319,126 +1369,125 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK5: .omp.sections.case5: -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5: .omp.sections.case1: +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP20]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN3]], [[TMP25]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP20]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done5: +// CHECK5-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) // CHECK5-NEXT: ret void // // @@ -1458,7 +1507,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1471,7 +1520,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/sections_private_codegen.cpp b/clang/test/OpenMP/sections_private_codegen.cpp --- a/clang/test/OpenMP/sections_private_codegen.cpp +++ b/clang/test/OpenMP/sections_private_codegen.cpp @@ -113,6 +113,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -122,7 +123,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] @@ -165,10 +166,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -181,6 +183,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 @@ -196,30 +200,30 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 @@ -227,28 +231,28 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP14]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -271,6 +275,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -279,7 +284,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 @@ -355,10 +360,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -370,6 +376,8 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 @@ -385,31 +393,31 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 @@ -418,28 +426,28 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP14]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done4: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -498,10 +506,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -512,28 +521,30 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -542,22 +553,22 @@ // CHECK3-NEXT: store i32 11, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP11]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -576,17 +587,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -597,28 +610,30 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -638,25 +653,25 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], ptr [[BLOCK_CAPTURED2]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK4-NEXT: call void [[TMP13]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_codegen.cpp @@ -192,6 +192,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -202,24 +203,36 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 6, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -246,197 +259,194 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 0 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP15]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP22]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: store i32 [[CONV7]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR14]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi float [[TMP23]] to i32 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 1 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR1]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 +// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP30]], [[TMP31]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL7:%.*]] = fcmp une float [[CALL6]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL12]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV13:%.*]] = uitofp i1 [[TMP25]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV8:%.*]] = uitofp i1 [[TMP32]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV8]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP14:%.*]] = fcmp olt float [[TMP26]], [[TMP27]] -// CHECK1-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = fcmp olt float [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP30]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]] -// CHECK1: land.rhs19: -// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END22]] -// CHECK1: land.end22: -// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ] -// CHECK1-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP32]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP16]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP3]] monotonic, align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = atomicrmw fadd ptr [[TMP2]], float [[TMP37]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL10]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = fcmp une float [[CALL12]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] +// CHECK1: land.rhs14: +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL16:%.*]] = fcmp une float [[CALL15]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END17]] +// CHECK1: land.end17: +// CHECK1-NEXT: [[TMP39:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] +// CHECK1-NEXT: [[CONV18:%.*]] = uitofp i1 [[TMP39]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], float noundef [[CONV18]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP11]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP8]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP42:%.*]], [[COND_END27:%.*]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32 [[TMP34]] to float -// CHECK1-NEXT: store float [[TMP35]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]] -// CHECK1: cond.true25: -// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.false26: -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.end27: -// CHECK1-NEXT: [[COND28:%.*]] = phi float [ [[TMP38]], [[COND_TRUE25]] ], [ [[TMP39]], [[COND_FALSE26]] ] -// CHECK1-NEXT: store float [[COND28]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = cmpxchg ptr [[TMP3]], i32 [[TMP34]], i32 [[TMP40]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP42]] = extractvalue { i32, i1 } [[TMP41]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = extractvalue { i32, i1 } [[TMP41]], 1 -// CHECK1-NEXT: br i1 [[TMP43]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END17]] ], [ [[TMP49:%.*]], [[COND_END22:%.*]] ] +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i32 [[TMP41]] to float +// CHECK1-NEXT: store float [[TMP42]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = fcmp olt float [[TMP43]], [[TMP44]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// CHECK1: cond.true20: +// CHECK1-NEXT: [[TMP45:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.false21: +// CHECK1-NEXT: [[TMP46:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.end22: +// CHECK1-NEXT: [[COND23:%.*]] = phi float [ [[TMP45]], [[COND_TRUE20]] ], [ [[TMP46]], [[COND_FALSE21]] ] +// CHECK1-NEXT: store float [[COND23]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = cmpxchg ptr [[TMP8]], i32 [[TMP41]], i32 [[TMP47]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP49]] = extractvalue { i32, i1 } [[TMP48]], 0 +// CHECK1-NEXT: [[TMP50:%.*]] = extractvalue { i32, i1 } [[TMP48]], 1 +// CHECK1-NEXT: br i1 [[TMP50]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP14]]) // CHECK1-NEXT: ret void // // @@ -543,6 +553,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -552,23 +563,35 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, ptr [[T_VAR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -635,165 +658,162 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP8:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE6:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case6: -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR3]], i64 4, i1 false) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP7]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP14]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL5:%.*]] = icmp ne i32 [[CALL4]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP24:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL10]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP24]] to i32 +// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL5]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP31]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP27]], [[COND_TRUE]] ], [ [[TMP28]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP34]], [[COND_TRUE]] ], [ [[TMP35]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP29]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL12]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]] -// CHECK1: land.rhs16: -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = icmp ne i32 [[CALL17]], 0 -// CHECK1-NEXT: br label [[LAND_END19]] -// CHECK1: land.end19: -// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ] -// CHECK1-NEXT: [[CONV20:%.*]] = zext i1 [[TMP31]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], i32 noundef [[CONV20]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP13]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw min ptr [[TMP3]], i32 [[TMP32]] monotonic, align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP36]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL7]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END14:%.*]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 +// CHECK1-NEXT: br label [[LAND_END14]] +// CHECK1: land.end14: +// CHECK1-NEXT: [[TMP38:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL13]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP38]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]], i32 noundef [[CONV15]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP8]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw min ptr [[TMP8]], i32 [[TMP39]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -939,10 +959,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -953,29 +974,31 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -983,40 +1006,40 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr @g, align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP14:%.*]] = load double, ptr @g, align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP14]], [[TMP15]] // CHECK3-NEXT: store double [[ADD]], ptr @g, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw fadd ptr @g, double [[TMP15]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw fadd ptr @g, double [[TMP16]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -1055,17 +1078,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1076,29 +1101,31 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -1117,42 +1144,42 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK4-NEXT: call void [[TMP11]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP15:%.*]] = load double, ptr @g, align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP15]], [[TMP16]] +// CHECK4-NEXT: [[TMP16:%.*]] = load double, ptr @g, align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] // CHECK4-NEXT: store double [[ADD]], ptr @g, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP17:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = atomicrmw fadd ptr @g, double [[TMP17]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP18:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = atomicrmw fadd ptr @g, double [[TMP18]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -41,275 +41,281 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP34]] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 9 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = sdiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP42:%.*]] = add nuw i64 [[TMP41]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = mul nuw i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP43]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP50]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP51]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP46]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP51]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP53]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = icmp slt i32 [[TMP54]], 0 -// CHECK1-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 0 -// CHECK1-NEXT: store i32 [[TMP56]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP57]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = icmp slt i32 [[TMP57]], 0 +// CHECK1-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[TMP57]], i32 0 +// CHECK1-NEXT: store i32 [[TMP59]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP60]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP58]], [[TMP59]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP61]], [[TMP62]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP60]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP67]]) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP70]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP76]], ptr [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 +// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP78]], ptr [[TMP70]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP77]], 1 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP80]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]]) -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1) -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP82]]) +// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP84]], i32 1) +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP88:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP88]], ptr [[TMP87]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP90]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP91]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP92]], [[TMP93]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP93]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP96]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP90]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP97]] monotonic, align 4 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP99]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 -// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 -// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP101:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP106:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP101]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP101]], i8 [[TMP104]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP106]] = extractvalue { i8, i1 } [[TMP105]], 0 +// CHECK1-NEXT: [[TMP107:%.*]] = extractvalue { i8, i1 } [[TMP105]], 1 +// CHECK1-NEXT: br i1 [[TMP107]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP99]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP90]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]]) -// CHECK1-NEXT: [[TMP106:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP106]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP107]]) +// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP108]]) +// CHECK1-NEXT: [[TMP109:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP109]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP110]]) // CHECK1-NEXT: ret void // // @@ -448,20 +454,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -475,7 +481,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -488,28 +488,30 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -517,42 +519,46 @@ // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK1-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP15]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -597,7 +603,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -607,54 +613,60 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP14]]) // CHECK1-NEXT: ret void // // @@ -684,9 +696,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -711,90 +721,100 @@ // CHECK1-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK1-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP26]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -802,44 +822,44 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // @@ -874,14 +894,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -890,49 +910,57 @@ // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP25]]) // CHECK1-NEXT: ret void // // @@ -969,36 +997,40 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK1-SAME: () #[[ATTR10]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -1370,36 +1402,40 @@ // CHECK2-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK2-SAME: () #[[ATTR10]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: invoke void @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1431,9 +1467,7 @@ // CHECK2-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1458,30 +1492,32 @@ // CHECK2-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1491,57 +1527,65 @@ // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK2-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.6, i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.6, i32 [[TMP26]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1549,9 +1593,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1571,22 +1613,24 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK2-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..7, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // @@ -1621,14 +1665,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1637,49 +1681,57 @@ // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK2-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK2-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.8, i32 [[TMP17]]) +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.8, i32 [[TMP25]]) // CHECK2-NEXT: ret void // // @@ -1718,71 +1770,77 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..9, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK2-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK2-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.10, i32 [[TMP11]]) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.10, i32 [[TMP15]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1790,15 +1848,15 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP3]], align 8 // CHECK2-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) @@ -1827,64 +1885,70 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK2-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK2-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.12, i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.12, i32 [[TMP14]]) // CHECK2-NEXT: ret void // // @@ -2262,36 +2326,40 @@ // CHECK4-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK4-SAME: () #[[ATTR10]] { // CHECK4-NEXT: entry: -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: invoke void @_Z3foov() // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2323,9 +2391,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2350,30 +2416,32 @@ // CHECK4-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2383,57 +2451,65 @@ // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK4-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP18]]) +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP26]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2441,9 +2517,7 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -2463,22 +2537,24 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // @@ -2513,14 +2589,14 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2529,49 +2605,57 @@ // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP17]]) +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP25]]) // CHECK4-NEXT: ret void // // @@ -2610,71 +2694,77 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK4-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK4-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK4-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP11]]) +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP15]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2682,15 +2772,15 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK4-NEXT: store ptr [[TMP5]], ptr [[TMP3]], align 8 // CHECK4-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) @@ -2719,64 +2809,70 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK4-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK4-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK4-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK4-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK4-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP10]]) +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP14]]) // CHECK4-NEXT: ret void // // @@ -3186,28 +3282,30 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG83:![0-9]+]] // CHECK5-NEXT: store double 0.000000e+00, ptr [[A]], align 8, !dbg [[DBG83]] // CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG84:![0-9]+]] // CHECK5-NEXT: store ptr [[A3]], ptr [[A2]], align 8, !dbg [[DBG84]] -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG85:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK5-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP2]]), !dbg [[DBG86]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !dbg [[DBG86]] +// CHECK5-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] // CHECK5-NEXT: ret void, !dbg [[DBG87:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG88:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG88:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -3215,42 +3313,46 @@ // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG89]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store double [[TMP4]], ptr [[A]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB16:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG91]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB16:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG91]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG92]] // CHECK5-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG92]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB16]], i32 [[TMP3]]), !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB16]], i32 [[TMP7]]), !dbg [[DBG92]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG92]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB16]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP11]]), !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB16]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP15]]), !dbg [[DBG92]] // CHECK5-NEXT: ret void, !dbg [[DBG95:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]], !dbg [[DBG92]] // CHECK5-NEXT: unreachable, !dbg [[DBG92]] // // @@ -3295,7 +3397,7 @@ // CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG105:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -3305,54 +3407,60 @@ // CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG107:![0-9]+]] // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00, !dbg [[DBG107]] // CHECK5-NEXT: store double [[INC]], ptr [[TMP3]], align 8, !dbg [[DBG107]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG108:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB22:[0-9]+]], i32 2, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP8]]), !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG109:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB22:[0-9]+]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG108]] // CHECK5-NEXT: ret void, !dbg [[DBG110:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG112]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store double [[TMP4]], ptr [[A]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB20:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG114]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB20:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG114]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG113]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8, !dbg [[DBG115:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00, !dbg [[DBG115]] -// CHECK5-NEXT: store double [[INC]], ptr [[TMP6]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB20]], i32 [[TMP3]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG113]] +// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8, !dbg [[DBG115:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00, !dbg [[DBG115]] +// CHECK5-NEXT: store double [[INC]], ptr [[TMP10]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB20]], i32 [[TMP7]]), !dbg [[DBG115]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG115]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB20]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP10]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB20]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP14]]), !dbg [[DBG115]] // CHECK5-NEXT: ret void, !dbg [[DBG117:![0-9]+]] // // @@ -3382,9 +3490,7 @@ // CHECK5-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -3409,177 +3515,187 @@ // CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG128:![0-9]+]] // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8, !dbg [[DBG128]] // CHECK5-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8, !dbg [[DBG128]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG129:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG130:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8, !dbg [[DBG131:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB26:[0-9]+]], i32 4, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]), !dbg [[DBG130]] -// CHECK5-NEXT: ret void, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG130:![0-9]+]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG130]] +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4, !dbg [[DBG131:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG132]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB26:[0-9]+]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG129]] +// CHECK5-NEXT: ret void, !dbg [[DBG133:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG133:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG134:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8, !dbg [[DBG136:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB24:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG136]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG136]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[B]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG136:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8, !dbg [[DBG137:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG138:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB24:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG137]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG137]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG138:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG139:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2, !dbg [[DBG138]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG139]] -// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG139:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG140:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2, !dbg [[DBG139]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP18]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG140]] +// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG139]] // CHECK5-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG138]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG139]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB24]], i32 [[TMP4]]), !dbg [[DBG138]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG138]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB24]], i32 [[TMP12]]), !dbg [[DBG139]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG139]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG140:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG141:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB24]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP18]]), !dbg [[DBG138]] -// CHECK5-NEXT: ret void, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG139]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP23]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB24]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP26]]), !dbg [[DBG139]] +// CHECK5-NEXT: ret void, !dbg [[DBG143:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]], !dbg [[DBG138]] -// CHECK5-NEXT: unreachable, !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]], !dbg [[DBG139]] +// CHECK5-NEXT: unreachable, !dbg [[DBG139]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv -// CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG143:![0-9]+]] { +// CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG144:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG144:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG144]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG145:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG145]] -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4, !dbg [[DBG145]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG146:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG146]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG147]] -// CHECK5-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4, !dbg [[DBG147]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG148:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG148]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG149]] -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4, !dbg [[DBG149]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG150:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG150]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG152:![0-9]+]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8, !dbg [[DBG152]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG153:![0-9]+]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8, !dbg [[DBG153]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB30:[0-9]+]], i32 4, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]), !dbg [[DBG151]] -// CHECK5-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG145:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG145]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG146:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG146]] +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4, !dbg [[DBG146]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG147:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG147]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG148:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG148]] +// CHECK5-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4, !dbg [[DBG148]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG149:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG149]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG150]] +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG151:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG152:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8, !dbg [[DBG152]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG152]] +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG153:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8, !dbg [[DBG153]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !dbg [[DBG153]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8, !dbg [[DBG154]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !dbg [[DBG154]] +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB30:[0-9]+]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG151]] +// CHECK5-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9 -// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG155:![0-9]+]] { +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG156:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG157:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG157]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG158:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG158]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG159:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG159]] -// CHECK5-NEXT: ret void, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG157:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG158]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG160:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG160]] +// CHECK5-NEXT: ret void, !dbg [[DBG160]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] !dbg [[DBG160:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG161:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3588,125 +3704,137 @@ // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG162:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8, !dbg [[DBG163:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG164:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB28:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG163]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG163]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG162:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[B]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG163:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8, !dbg [[DBG164:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB28:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG164]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG164]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG162]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG165:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG165]] -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4, !dbg [[DBG166:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG166]] -// CHECK5-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4, !dbg [[DBG166]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG164]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !dbg [[DBG167:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG167]] -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4, !dbg [[DBG167]] -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB28]], i32 [[TMP4]]), !dbg [[DBG165]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG163]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1, !dbg [[DBG166]] +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !dbg [[DBG167:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1, !dbg [[DBG167]] +// CHECK5-NEXT: store i32 [[DEC]], ptr [[B]], align 4, !dbg [[DBG167]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !dbg [[DBG168:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1, !dbg [[DBG168]] +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4, !dbg [[DBG168]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB28]], i32 [[TMP12]]), !dbg [[DBG166]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG166]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG168:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG169:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB28]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP17]]), !dbg [[DBG165]] -// CHECK5-NEXT: ret void, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG166]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP22]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB28]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP25]]), !dbg [[DBG166]] +// CHECK5-NEXT: ret void, !dbg [[DBG171:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11 -// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG171:![0-9]+]] { +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG172:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG172:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG173]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG174]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG175:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG175]] -// CHECK5-NEXT: ret void, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG173:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG174]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG175:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG176]] +// CHECK5-NEXT: ret void, !dbg [[DBG176]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z15parallel_singlev -// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG176:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG177:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB35:[0-9]+]], i32 0, ptr @.omp_outlined..12), !dbg [[DBG177:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB35:[0-9]+]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG179:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG179:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG180:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG180:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB32:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG180]] -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG180]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG182:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB32:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0, !dbg [[DBG182]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG182]] // CHECK5: omp_if.then: // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG183:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB32]], i32 [[TMP1]]), !dbg [[DBG181]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG181]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB32]], i32 [[TMP2]]), !dbg [[DBG183]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG183]] // CHECK5: omp_if.end: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB33:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG182:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG182]] +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB33:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG184:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG184]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG181]] -// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG181]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]], !dbg [[DBG181]] -// CHECK5-NEXT: unreachable, !dbg [[DBG181]] +// CHECK5-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG183]] +// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG183]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]], !dbg [[DBG183]] +// CHECK5-NEXT: unreachable, !dbg [[DBG183]] // // // CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG183:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG185:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG184:![0-9]+]] -// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG184]] +// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG186:![0-9]+]] +// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG186]] // CHECK5-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp --- a/clang/test/OpenMP/single_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp @@ -354,6 +354,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -362,22 +363,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -448,77 +457,76 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -740,39 +748,41 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 17, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: store i32 17, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -928,37 +938,42 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 37, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 37, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -970,18 +985,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP7]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: call void [[TMP10]](ptr [[BLOCK]]) -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP9:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr [[BLOCK]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_private_codegen.cpp b/clang/test/OpenMP/single_private_codegen.cpp --- a/clang/test/OpenMP/single_private_codegen.cpp +++ b/clang/test/OpenMP/single_private_codegen.cpp @@ -100,6 +100,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -109,7 +110,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] @@ -152,10 +153,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -163,11 +165,13 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 @@ -180,27 +184,27 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 303, ptr [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -223,6 +227,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -231,7 +236,7 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 @@ -307,21 +312,24 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 @@ -334,26 +342,26 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -412,32 +420,35 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK3-NEXT: store i32 101, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -456,27 +467,31 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK4-NEXT: store i32 101, ptr [[SIVAR]], align 4 @@ -491,18 +506,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP4]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP5]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[BLOCK_CAPTURED1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: call void [[TMP7]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[BLOCK_CAPTURED1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: call void [[TMP8]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -363,6 +363,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -388,27 +389,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined., ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -416,34 +423,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -454,46 +461,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -509,147 +522,149 @@ // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr @Gb, align 8 -// CHECK1-NEXT: store double [[TMP6]], ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK1-NEXT: store float [[TMP8]], ptr [[SB_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @Gc, align 8 -// CHECK1-NEXT: store double [[TMP10]], ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK1-NEXT: store i16 [[TMP12]], ptr [[C_CASTED]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr [[SC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK1-NEXT: store i16 [[TMP16]], ptr [[D_CASTED]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr @Gd, align 8 -// CHECK1-NEXT: store double [[TMP18]], ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK1-NEXT: store float [[TMP20]], ptr [[SD_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr @Gb, align 8 +// CHECK1-NEXT: store double [[TMP11]], ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[SB_CASTED]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, ptr @Gc, align 8 +// CHECK1-NEXT: store double [[TMP15]], ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK1-NEXT: store i16 [[TMP17]], ptr [[C_CASTED]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: store float [[TMP19]], ptr [[SC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK1-NEXT: store i16 [[TMP21]], ptr [[D_CASTED]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr @Gd, align 8 +// CHECK1-NEXT: store double [[TMP23]], ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: store float [[TMP25]], ptr [[SD_CASTED]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK1-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true2: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 2, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 9, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP66]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP68:%.*]] = icmp ne i32 [[TMP67]], 0 -// CHECK1-NEXT: br i1 [[TMP68]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP59]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, ptr [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP69]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP70]], align 4 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP73:%.*]] = icmp ne i32 [[TMP72]], 0 +// CHECK1-NEXT: br i1 [[TMP73]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -667,6 +682,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -692,27 +708,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -720,34 +742,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -777,46 +799,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -832,147 +860,149 @@ // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr @Gb, align 8 -// CHECK1-NEXT: store double [[TMP6]], ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK1-NEXT: store float [[TMP8]], ptr [[SB_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @Gc, align 8 -// CHECK1-NEXT: store double [[TMP10]], ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK1-NEXT: store i16 [[TMP12]], ptr [[C_CASTED]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr [[SC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK1-NEXT: store i16 [[TMP16]], ptr [[D_CASTED]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr @Gd, align 8 -// CHECK1-NEXT: store double [[TMP18]], ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK1-NEXT: store float [[TMP20]], ptr [[SD_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr @Gb, align 8 +// CHECK1-NEXT: store double [[TMP11]], ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[SB_CASTED]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, ptr @Gc, align 8 +// CHECK1-NEXT: store double [[TMP15]], ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK1-NEXT: store i16 [[TMP17]], ptr [[C_CASTED]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: store float [[TMP19]], ptr [[SC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK1-NEXT: store i16 [[TMP21]], ptr [[D_CASTED]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr @Gd, align 8 +// CHECK1-NEXT: store double [[TMP23]], ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: store float [[TMP25]], ptr [[SD_CASTED]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK1-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true2: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 2, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 9, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP66]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP68:%.*]] = icmp ne i32 [[TMP67]], 0 -// CHECK1-NEXT: br i1 [[TMP68]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP59]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, ptr [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP69]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP70]], align 4 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP73:%.*]] = icmp ne i32 [[TMP72]], 0 +// CHECK1-NEXT: br i1 [[TMP73]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -990,6 +1020,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1015,27 +1046,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -1043,34 +1080,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -1269,6 +1306,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1303,27 +1341,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined., ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1331,34 +1375,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1369,46 +1413,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1421,138 +1471,140 @@ // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK3-NEXT: store float [[TMP6]], ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK3-NEXT: store i16 [[TMP8]], ptr [[C_CASTED]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK3-NEXT: store float [[TMP10]], ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK3-NEXT: store i16 [[TMP12]], ptr [[D_CASTED]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK3-NEXT: store float [[TMP14]], ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: store float [[TMP11]], ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[C_CASTED]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: store float [[TMP15]], ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK3-NEXT: store i16 [[TMP17]], ptr [[D_CASTED]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: store float [[TMP19]], ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK3-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true2: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 2, ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 9, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP56]], align 8 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP57]], align 8 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 -// CHECK3-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP53]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, ptr [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP51]], ptr [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP61]], align 8 +// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP62]], align 8 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP63]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP64]], align 4 +// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 +// CHECK3-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1573,6 +1625,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1607,27 +1660,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1635,34 +1694,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1692,46 +1751,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1744,138 +1809,140 @@ // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK3-NEXT: store float [[TMP6]], ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK3-NEXT: store i16 [[TMP8]], ptr [[C_CASTED]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK3-NEXT: store float [[TMP10]], ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK3-NEXT: store i16 [[TMP12]], ptr [[D_CASTED]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK3-NEXT: store float [[TMP14]], ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: store float [[TMP11]], ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[C_CASTED]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: store float [[TMP15]], ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK3-NEXT: store i16 [[TMP17]], ptr [[D_CASTED]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: store float [[TMP19]], ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK3-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true2: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 2, ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 9, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP56]], align 8 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP57]], align 8 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP60]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 -// CHECK3-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP53]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, ptr [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP51]], ptr [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP61]], align 8 +// CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP62]], align 8 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP63]], align 4 +// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP64]], align 4 +// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 +// CHECK3-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1896,6 +1963,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1930,27 +1998,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1958,34 +2032,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_data_map_codegen_hold.cpp b/clang/test/OpenMP/target_data_map_codegen_hold.cpp --- a/clang/test/OpenMP/target_data_map_codegen_hold.cpp +++ b/clang/test/OpenMP/target_data_map_codegen_hold.cpp @@ -522,3 +522,13 @@ // CHECK-I386: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } // CHECK-I386: attributes #1 = { nounwind } // CHECK-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +//. +// CHECK-PPC64LE: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK-PPC64LE: !1 = !{i32 7, !"openmp", i32 50} +// CHECK-PPC64LE: !2 = !{!"clang version 17.0.0"} +//. +// CHECK-I386: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-I386: !1 = !{i32 1, !"wchar_size", i32 4} +// CHECK-I386: !2 = !{i32 7, !"openmp", i32 50} +// CHECK-I386: !3 = !{!"clang version 17.0.0"} +//. diff --git a/clang/test/OpenMP/target_in_reduction_codegen.cpp b/clang/test/OpenMP/target_in_reduction_codegen.cpp --- a/clang/test/OpenMP/target_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_in_reduction_codegen.cpp @@ -49,6 +49,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: %omp.outlined.arg.agg. = alloca %struct.anon // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -168,7 +169,15 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 0 +// CHECK1-NEXT: store i32* %a, i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 1 +// CHECK1-NEXT: store i64 %2, i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 2 +// CHECK1-NEXT: store i16* %vla, i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 3 +// CHECK1-NEXT: store i8** %.task_red., i8*** [[TMP62]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* %omp.outlined.arg.agg.) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -462,52 +471,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[CONTEXT_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[CONTEXT_ADDR_:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 48, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP16]] to %struct.anon* -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]]) -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP11]], %struct.kmp_task_t_with_privates* [[TMP13]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]]) +// CHECK1-NEXT: store %struct.anon* [[CONTEXT_]], %struct.anon** [[CONTEXT_ADDR_]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR_]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 1, i64 48, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP21]] to %struct.anon.1* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP23]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** %8, align 8 +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 %16, i8* [[TMP17]]) +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @.omp_task_entry.(i32 %16, %struct.kmp_task_t_with_privates* [[TMP18]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 %16, i8* [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -573,7 +581,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 @@ -585,7 +593,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -598,25 +606,25 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon.1* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast void (i8*, ...)* [[TMP15]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP17]](i8* [[TMP16]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP20]] to i8* // CHECK1-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP22]], i8* [[TMP21]], i8* [[TMP23]]) // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load i16*, i16** [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP18]], align 8 // CHECK1-NEXT: call void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(i32* [[TMP26]], i64 [[TMP14]], i16* [[TMP28]], i8* [[TMP29]]) #[[ATTR3]] diff --git a/clang/test/OpenMP/target_map_codegen_03.cpp b/clang/test/OpenMP/target_map_codegen_03.cpp --- a/clang/test/OpenMP/target_map_codegen_03.cpp +++ b/clang/test/OpenMP/target_map_codegen_03.cpp @@ -62,19 +62,22 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 @@ -82,50 +85,52 @@ // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 2, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK1-NEXT: store i32 0, ptr [[TMP20]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[I_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[I_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret void @@ -135,24 +140,29 @@ // CHECK1-SAME: (i64 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -168,19 +178,22 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 @@ -188,50 +201,52 @@ // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 2, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK3-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK3-NEXT: store i32 0, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret void @@ -241,24 +256,29 @@ // CHECK3-SAME: (i32 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_map_codegen_hold.cpp b/clang/test/OpenMP/target_map_codegen_hold.cpp --- a/clang/test/OpenMP/target_map_codegen_hold.cpp +++ b/clang/test/OpenMP/target_map_codegen_hold.cpp @@ -133,33 +133,81 @@ #endif //. +// CHECK-USE-PPC64LE: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] // CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475] +// CHECK-USE-PPC64LE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-USE-PPC64LE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-USE-PPC64LE: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] // CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255] +// CHECK-USE-PPC64LE: @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] // CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851] +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id, ptr @.omp_offloading.entry_name.5, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id, ptr @.omp_offloading.entry_name.6, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }] //. +// CHECK-USE-I386: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] // CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475] +// CHECK-USE-I386: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-USE-I386: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-USE-I386: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] // CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255] +// CHECK-USE-I386: @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] // CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851] +// CHECK-USE-I386: @.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id, ptr @.omp_offloading.entry_name, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id, ptr @.omp_offloading.entry_name.5, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id, ptr @.omp_offloading.entry_name.6, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }] //. +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] // CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475] +// CHECK-NOUSE-PPC64LE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NOUSE-PPC64LE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] // CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223] +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] // CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851] +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id, ptr @.omp_offloading.entry_name.5, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id, ptr @.omp_offloading.entry_name.6, i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }] //. +// CHECK-NOUSE-I386: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] // CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475] +// CHECK-NOUSE-I386: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NOUSE-I386: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-NOUSE-I386: @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] // CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223] +// CHECK-NOUSE-I386: @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] // CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851] +// CHECK-NOUSE-I386: @.omp_offloading.entry_name = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l100.region_id, ptr @.omp_offloading.entry_name, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [62 x i8] c"__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__Z20explicit_maps_singlei_l114.region_id, ptr @.omp_offloading.entry_name.5, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125 = weak constant %struct.__tgt_offload_entry { ptr @.__omp_offloading_10302_b980539__ZN2ST20test_present_membersEv_l125.region_id, ptr @.omp_offloading.entry_name.6, i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @.omp_offloading.requires_reg, ptr null }] //. // CHECK-USE-PPC64LE-LABEL: define {{[^@]+}}@_Z20explicit_maps_singlei // CHECK-USE-PPC64LE-SAME: (i32 noundef signext [[II:%.*]]) #[[ATTR0:[0-9]+]] { @@ -1301,3 +1349,59 @@ // CHECK-NOUSE-I386-NEXT: entry: // CHECK-NOUSE-I386-NEXT: call void @__tgt_register_requires(i64 1) // CHECK-NOUSE-I386-NEXT: ret void +// +//. +// CHECK-USE-PPC64LE: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-USE-PPC64LE: attributes #1 = { noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-USE-PPC64LE: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-USE-PPC64LE: attributes #3 = { nounwind } +// CHECK-USE-PPC64LE: attributes #4 = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +//. +// CHECK-USE-I386: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-USE-I386: attributes #1 = { noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-USE-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-USE-I386: attributes #3 = { nounwind } +// CHECK-USE-I386: attributes #4 = { noinline nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +//. +// CHECK-NOUSE-PPC64LE: attributes #0 = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-NOUSE-PPC64LE: attributes #1 = { noinline norecurse nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-NOUSE-PPC64LE: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-NOUSE-PPC64LE: attributes #3 = { nounwind } +// CHECK-NOUSE-PPC64LE: attributes #4 = { noinline nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +//. +// CHECK-NOUSE-I386: attributes #0 = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-NOUSE-I386: attributes #1 = { noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-NOUSE-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-NOUSE-I386: attributes #3 = { nounwind } +// CHECK-NOUSE-I386: attributes #4 = { noinline nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +//. +// CHECK-USE-PPC64LE: !0 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 100, i32 0, i32 0} +// CHECK-USE-PPC64LE: !1 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 114, i32 0, i32 1} +// CHECK-USE-PPC64LE: !2 = !{i32 0, i32 66306, i32 194512185, !"_ZN2ST20test_present_membersEv", i32 125, i32 0, i32 2} +// CHECK-USE-PPC64LE: !3 = !{i32 1, !"wchar_size", i32 4} +// CHECK-USE-PPC64LE: !4 = !{i32 7, !"openmp", i32 50} +// CHECK-USE-PPC64LE: !5 = !{!"clang version 17.0.0"} +//. +// CHECK-USE-I386: !0 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 100, i32 0, i32 0} +// CHECK-USE-I386: !1 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 114, i32 0, i32 1} +// CHECK-USE-I386: !2 = !{i32 0, i32 66306, i32 194512185, !"_ZN2ST20test_present_membersEv", i32 125, i32 0, i32 2} +// CHECK-USE-I386: !3 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-USE-I386: !4 = !{i32 1, !"wchar_size", i32 4} +// CHECK-USE-I386: !5 = !{i32 7, !"openmp", i32 50} +// CHECK-USE-I386: !6 = !{!"clang version 17.0.0"} +//. +// CHECK-NOUSE-PPC64LE: !0 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 100, i32 0, i32 0} +// CHECK-NOUSE-PPC64LE: !1 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 114, i32 0, i32 1} +// CHECK-NOUSE-PPC64LE: !2 = !{i32 0, i32 66306, i32 194512185, !"_ZN2ST20test_present_membersEv", i32 125, i32 0, i32 2} +// CHECK-NOUSE-PPC64LE: !3 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NOUSE-PPC64LE: !4 = !{i32 7, !"openmp", i32 50} +// CHECK-NOUSE-PPC64LE: !5 = !{!"clang version 17.0.0"} +//. +// CHECK-NOUSE-I386: !0 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 100, i32 0, i32 0} +// CHECK-NOUSE-I386: !1 = !{i32 0, i32 66306, i32 194512185, !"_Z20explicit_maps_singlei", i32 114, i32 0, i32 1} +// CHECK-NOUSE-I386: !2 = !{i32 0, i32 66306, i32 194512185, !"_ZN2ST20test_present_membersEv", i32 125, i32 0, i32 2} +// CHECK-NOUSE-I386: !3 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-NOUSE-I386: !4 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NOUSE-I386: !5 = !{i32 7, !"openmp", i32 50} +// CHECK-NOUSE-I386: !6 = !{!"clang version 17.0.0"} +//. diff --git a/clang/test/OpenMP/target_map_member_expr_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_codegen.cpp --- a/clang/test/OpenMP/target_map_member_expr_codegen.cpp +++ b/clang/test/OpenMP/target_map_member_expr_codegen.cpp @@ -473,53 +473,59 @@ // CHECK-NEXT: [[CSIZE_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CSIZE_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: store i64 [[CSIZE]], ptr [[CSIZE_ADDR]], align 8 // CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4 -// CHECK-NEXT: store i32 [[TMP1]], ptr [[CSIZE_CASTED]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[CSIZE_CASTED]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., i64 [[TMP2]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[CSIZE:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[D:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CSIZE_ADDR:%.*]] = alloca i64, align 8 -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[CSIZE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store i64 [[CSIZE]], ptr [[CSIZE_ADDR]], align 8 -// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[CSIZE]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK-NEXT: store i32 0, ptr [[I]], align 4 // CHECK-NEXT: br label [[FOR_COND:%.*]] // CHECK: for.cond: -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[CSIZE_ADDR]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], [[TMP2]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[CSIZE]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK: for.body: -// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP3]], i32 0, i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR:%.*]], ptr [[TMP7]], i32 0, i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM]] // CHECK-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: br label [[FOR_INC:%.*]] // CHECK: for.inc: -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK: for.end: diff --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp --- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp +++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp @@ -357,7 +357,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS10:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -454,7 +454,7 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 // CHECK1-NEXT: store i32 [[TMP51]], ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 64, i64 4, ptr @.omp_task_entry., i64 -1) @@ -603,34 +603,40 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -652,26 +658,27 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -681,80 +688,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..4, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -767,13 +788,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -786,81 +808,89 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -876,18 +906,22 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -999,17 +1033,21 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..11) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -1019,8 +1057,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1028,32 +1065,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..12, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1246,7 +1290,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS9:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS10:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS11:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -1343,7 +1387,7 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP47]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 // CHECK3-NEXT: store i32 [[TMP51]], ptr [[TMP50]], align 4 // CHECK3-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 36, i32 4, ptr @.omp_task_entry., i64 -1) @@ -1492,34 +1536,40 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1541,26 +1591,27 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1570,78 +1621,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..4, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1654,13 +1719,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1673,79 +1739,87 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1761,18 +1835,22 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1884,17 +1962,21 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..11) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1904,8 +1986,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1913,32 +1994,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..12, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1955,26 +2043,27 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1984,80 +2073,94 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2070,13 +2173,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2089,81 +2193,89 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2179,18 +2291,22 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2200,34 +2316,40 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -2246,17 +2368,21 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2266,8 +2392,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -2275,32 +2400,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -2310,26 +2442,27 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2339,78 +2472,92 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2423,13 +2570,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2442,79 +2590,87 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2530,18 +2686,22 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -2551,34 +2711,40 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -2597,17 +2763,21 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -2617,8 +2787,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -2626,31 +2795,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -303,7 +303,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 @@ -569,17 +569,21 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -653,27 +657,32 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -681,36 +690,41 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK1: .cancel.continue: // CHECK1-NEXT: ret void @@ -721,39 +735,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -769,7 +789,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -787,82 +807,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1217,7 +1250,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1227,48 +1260,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1279,59 +1321,67 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1342,47 +1392,54 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1406,7 +1463,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 @@ -1672,17 +1729,21 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1756,27 +1817,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1784,36 +1850,41 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK3: .cancel.continue: // CHECK3-NEXT: ret void @@ -1824,39 +1895,45 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -1872,7 +1949,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1890,82 +1967,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2320,7 +2410,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2330,48 +2420,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -2382,59 +2481,67 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -2445,47 +2552,54 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -2500,17 +2614,21 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2518,36 +2636,41 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: -// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK9-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK9: .cancel.continue: // CHECK9-NEXT: ret void @@ -2558,39 +2681,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -2606,7 +2735,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2624,82 +2753,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK9-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK9-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK9-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK9-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -2710,59 +2852,67 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -2775,7 +2925,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2785,48 +2935,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -2836,47 +2995,54 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -2884,17 +3050,21 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -2902,36 +3072,41 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK11-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK11-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: -// CHECK11-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK11-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK11: .cancel.continue: // CHECK11-NEXT: ret void @@ -2942,39 +3117,45 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -2990,7 +3171,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3008,82 +3189,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -3094,59 +3288,67 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // @@ -3159,7 +3361,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3169,48 +3371,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -3220,47 +3431,54 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -74,8 +74,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -100,184 +100,138 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG48:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG49:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG48:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP14]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP15:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP15]]), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP16]]), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP15]], i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2), !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG52:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG52:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG53:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG70:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B3]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG76:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG78]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 1, !dbg [[DBG81:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG81]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG80]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG83]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG85]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG88]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG91]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG68:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP6]], i64 400, i1 false), !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG76]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 1, !dbg [[DBG79:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG79]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG78]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG81]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG84:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG83]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG86]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG86]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG86]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG91]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG93]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG96]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG98]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG98]] -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG98]] -// CHECK1-NEXT: ret void, !dbg [[DBG99:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG100:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG114:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG114]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG114]] -// CHECK1-NEXT: ret void, !dbg [[DBG114]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG94]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP16]] to i1, !dbg [[DBG96]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG96]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP15]], !dbg [[DBG96]] +// CHECK1-NEXT: [[TOBOOL21:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG96]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL21]] to i8, !dbg [[DBG96]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG96]] +// CHECK1-NEXT: ret void, !dbg [[DBG97:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG98:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG123:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG123]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP8]]) #[[ATTR3]], !dbg [[DBG123]] -// CHECK1-NEXT: ret void, !dbg [[DBG123]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG110]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP8]]) #[[ATTR4:[0-9]+]], !dbg [[DBG110]] +// CHECK1-NEXT: ret void, !dbg [[DBG110]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG124:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG111:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -286,203 +240,155 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG137]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG123:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG124:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false), !dbg [[DBG124]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG124]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG124]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG139:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG125:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG126:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG125]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP14]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG125]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP18]], ptr [[TMP17]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP17]]), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2), !dbg [[DBG128:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG129:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG137]] +// CHECK1-NEXT: ret void, !dbg [[DBG124]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG142:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG130:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG159]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG162]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG164]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG167]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG169:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG170:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG169]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG169]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG171:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG172]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG172]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG176:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG177]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG177]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG179:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG180]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG180]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG182:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG183:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META190:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG191]] -// CHECK1-NEXT: ret void, !dbg [[DBG191]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG144]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG150]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG149]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG152]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG154:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG154]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG154]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG156:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG157:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG157]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG157]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG161:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG162]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG162]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG164:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG165]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG192:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG168:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] -// CHECK1-NEXT: ret void, !dbg [[DBG198]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR4]], !dbg [[DBG174]] +// CHECK1-NEXT: ret void, !dbg [[DBG174]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG175:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -492,209 +398,156 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG188:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false), !dbg [[DBG188]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG188]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG188]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP19]]), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG192:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG212]] +// CHECK1-NEXT: ret void, !dbg [[DBG188]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG193:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META222:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG230:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG234]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG240:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG240]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG239]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG242]] -// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG243:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG244:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG245:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG244]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG244]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG246:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG247:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG247]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG247]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG247]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG249:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG251:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG253:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG252]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG252]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG256:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG255]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG255]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG257]] -// CHECK1-NEXT: ret void, !dbg [[DBG258:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG259:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG269:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG269]] -// CHECK1-NEXT: ret void, !dbg [[DBG269]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG204:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META205:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG208:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[G]], align 8, !dbg [[DBG210]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG215]] +// CHECK1-NEXT: store i32 5, ptr [[TMP4]], align 4, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG217:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG217]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG217]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG219:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG220:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG220]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG221:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG220]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG220]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG222:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG223:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG224:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG225:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG226:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG225]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG225]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG227:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG228]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG230:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG230]] +// CHECK1-NEXT: ret void, !dbg [[DBG231:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG270:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG232:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236:![0-9]+]] // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG278]] -// CHECK1-NEXT: ret void, !dbg [[DBG278]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR4]], !dbg [[DBG240]] +// CHECK1-NEXT: ret void, !dbg [[DBG240]] // diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -335,7 +335,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED3:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 @@ -437,13 +437,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP44]], ptr [[TMP43]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK1-NEXT: store i32 [[TMP46]], ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: store i32 [[TMP48]], ptr [[TMP47]], align 4 // CHECK1-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -654,15 +654,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -672,46 +674,48 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -719,19 +723,19 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // @@ -740,27 +744,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -772,52 +776,58 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -825,12 +835,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -842,33 +852,32 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -883,88 +892,96 @@ // CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -1045,8 +1062,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !24 @@ -1096,27 +1113,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1126,60 +1143,66 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1196,8 +1219,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1216,31 +1238,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1250,122 +1281,128 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1720,7 +1757,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1730,23 +1767,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1756,72 +1798,76 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1832,45 +1878,53 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1880,30 +1934,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1913,65 +1968,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -2010,7 +2071,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 @@ -2107,13 +2168,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP40:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP40]], ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK3-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2326,15 +2387,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2344,46 +2407,48 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK3: .cancel.exit2: // CHECK3-NEXT: br label [[CANCEL_EXIT]] // CHECK3: .cancel.continue3: @@ -2391,19 +2456,19 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -2412,24 +2477,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2438,69 +2505,73 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP19]], ptr [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2512,33 +2583,32 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2553,88 +2623,96 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2715,8 +2793,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !25 @@ -2766,27 +2844,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2796,60 +2874,66 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2866,8 +2950,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2886,31 +2969,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2920,122 +3012,128 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3390,7 +3488,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3400,23 +3498,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3426,72 +3529,76 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3502,45 +3609,53 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3550,30 +3665,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3583,65 +3699,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3655,15 +3777,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3673,46 +3797,48 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK9-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK9: .cancel.exit2: // CHECK9-NEXT: br label [[CANCEL_EXIT]] // CHECK9: .cancel.continue3: @@ -3720,19 +3846,19 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -3742,33 +3868,32 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -3783,88 +3908,96 @@ // CHECK9-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK9-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK9-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK9-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK9-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK9-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK9-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK9-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK9-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK9-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -3881,27 +4014,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3911,60 +4044,66 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK9-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -3981,8 +4120,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4001,31 +4139,40 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4035,122 +4182,128 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK9-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK9-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK9-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK9-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK9-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK9-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4161,45 +4314,53 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: ret void // // @@ -4211,7 +4372,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4221,23 +4382,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4247,72 +4413,76 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK9-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK9-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK9-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4322,30 +4492,31 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4355,80 +4526,88 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK9-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4438,46 +4617,48 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK11-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK11-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK11: .cancel.exit2: // CHECK11-NEXT: br label [[CANCEL_EXIT]] // CHECK11: .cancel.continue3: @@ -4485,19 +4666,19 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: cancel.cont: // CHECK11-NEXT: ret void // CHECK11: cancel.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT]] // // @@ -4507,33 +4688,32 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -4548,88 +4728,96 @@ // CHECK11-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK11-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK11-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK11-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK11-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK11-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK11-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK11-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK11-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK11-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK11-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK11-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK11-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: // CHECK11-NEXT: ret void @@ -4646,27 +4834,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4676,60 +4864,66 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK11-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4746,8 +4940,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4766,31 +4959,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4800,122 +5002,128 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK11-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -4926,45 +5134,53 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: ret void // // @@ -4976,7 +5192,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4986,23 +5202,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5012,72 +5233,76 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK11-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK11-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK11-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK11-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5087,30 +5312,31 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5120,65 +5346,71 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK11-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // // @@ -5211,7 +5443,7 @@ // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: [[A_CASTED3:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 @@ -5313,13 +5545,13 @@ // CHECK17-NEXT: store ptr null, ptr [[TMP40]], align 8 // CHECK17-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK17-NEXT: [[TMP44:%.*]] = load i16, ptr [[AA]], align 2 // CHECK17-NEXT: store i16 [[TMP44]], ptr [[TMP43]], align 4 -// CHECK17-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK17-NEXT: store i32 [[TMP46]], ptr [[TMP45]], align 4 -// CHECK17-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK17-NEXT: [[TMP48:%.*]] = load i32, ptr [[A]], align 4 // CHECK17-NEXT: store i32 [[TMP48]], ptr [[TMP47]], align 4 // CHECK17-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -5530,15 +5762,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK17-SAME: () #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5548,46 +5782,48 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK17-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK17-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK17: .cancel.exit: // CHECK17-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK17: .cancel.continue: -// CHECK17-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK17-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK17: .cancel.exit2: // CHECK17-NEXT: br label [[CANCEL_EXIT]] // CHECK17: .cancel.continue3: @@ -5595,19 +5831,19 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK17: cancel.cont: // CHECK17-NEXT: ret void // CHECK17: cancel.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT]] // // @@ -5616,27 +5852,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5648,52 +5884,58 @@ // CHECK17-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -5701,12 +5943,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK17-NEXT: store i64 [[TMP14]], ptr [[K_ADDR]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[K]], align 8 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5718,33 +5960,32 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -5759,88 +6000,96 @@ // CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK17-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK17-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK17-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5921,8 +6170,8 @@ // CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK17-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !24 // CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !24 @@ -5972,27 +6221,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6002,60 +6251,66 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // @@ -6071,61 +6326,8 @@ // CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) -// CHECK17-NEXT: ret void -// -// -// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { -// CHECK17-NEXT: entry: -// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 -// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 -// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -6144,104 +6346,171 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: ret void +// +// +// CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { +// CHECK17-NEXT: entry: +// CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK17-NEXT: ret void // // @@ -6596,7 +6865,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -6606,23 +6875,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6632,72 +6906,76 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK17-NEXT: ret void // // @@ -6708,45 +6986,53 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -6756,30 +7042,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6789,65 +7076,71 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK17-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // @@ -6886,7 +7179,7 @@ // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 @@ -6983,13 +7276,13 @@ // CHECK19-NEXT: store ptr null, ptr [[TMP36]], align 4 // CHECK19-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK19-NEXT: [[TMP40:%.*]] = load i16, ptr [[AA]], align 2 // CHECK19-NEXT: store i16 [[TMP40]], ptr [[TMP39]], align 4 -// CHECK19-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK19-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4 -// CHECK19-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[A]], align 4 // CHECK19-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4 // CHECK19-NEXT: [[TMP45:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -7202,15 +7495,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK19-SAME: () #[[ATTR2:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7220,46 +7515,48 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK19-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK19-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK19: .cancel.exit: // CHECK19-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK19: .cancel.continue: -// CHECK19-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK19-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK19: .cancel.exit2: // CHECK19-NEXT: br label [[CANCEL_EXIT]] // CHECK19: .cancel.continue3: @@ -7267,19 +7564,19 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK19: cancel.cont: // CHECK19-NEXT: ret void // CHECK19: cancel.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT]] // // @@ -7288,24 +7585,26 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -7314,69 +7613,73 @@ // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK19-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK19-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK19-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK19-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK19-NEXT: store i64 [[TMP15]], ptr [[TMP0]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[K]], align 8 +// CHECK19-NEXT: store i64 [[TMP19]], ptr [[TMP4]], align 8 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7388,33 +7691,32 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -7429,88 +7731,96 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK19-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK19-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK19-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7591,8 +7901,8 @@ // CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK19-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !25 @@ -7642,27 +7952,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7672,60 +7982,66 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK19-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // @@ -7742,8 +8058,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7762,31 +8077,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7796,122 +8120,128 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK19-NEXT: ret void // // @@ -8266,7 +8596,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -8276,23 +8606,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8302,72 +8637,76 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK19-NEXT: ret void // // @@ -8378,45 +8717,53 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -8426,30 +8773,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8459,65 +8807,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK19-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -65,8 +65,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -94,232 +94,186 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG45:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG42:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP14]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP15:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP15]]), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP18]] to i1, !dbg [[DBG45]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP19]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP16]]), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP15]], i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2), !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG47:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG48:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG63:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP6]], i64 400, i1 false), !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG63]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG68]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG66]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG66]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG66]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG66]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG72:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG72]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG89]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG92]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG97]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]] -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG100]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG102]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG102]] -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG73]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG80]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 1, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG83]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG85]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG87]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG87]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG90]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG90]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG90]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG95]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG95]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG98]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1, !dbg [[DBG100]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG100]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP25]], !dbg [[DBG100]] +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG100]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL23]] to i8, !dbg [[DBG100]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG100]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG101:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1, !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP104:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG122]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]] -// CHECK1-NEXT: ret void, !dbg [[DBG122]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG105:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG106:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -327,32 +281,32 @@ // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG132]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]] -// CHECK1-NEXT: ret void, !dbg [[DBG132]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG119]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3:[0-9]+]], !dbg [[DBG119]] +// CHECK1-NEXT: ret void, !dbg [[DBG119]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG120:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -361,66 +315,63 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG146]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG133:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false), !dbg [[DBG133]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG133]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG133]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG148:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG135:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG134]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP14]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG134]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP18]], ptr [[TMP17]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP17]]), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2), !dbg [[DBG137:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG138:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG146]] +// CHECK1-NEXT: ret void, !dbg [[DBG133]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG139:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -431,206 +382,161 @@ // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG150]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG153]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG153]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG153]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG153]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG158]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG174]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG177]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG180]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]] -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]] -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG199:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG159]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG159]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META160:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG162]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG168:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG168]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG167]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG170]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG171:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG172]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG172]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG175]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG175]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG175]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG178:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG179:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG180:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG180]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG180]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG183:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG183]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG183]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG185:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP186:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP188:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]] -// CHECK1-NEXT: ret void, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG189:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG190:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]] -// CHECK1-NEXT: ret void, !dbg [[DBG222]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] +// CHECK1-NEXT: ret void, !dbg [[DBG198]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -640,66 +546,65 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG236]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false), !dbg [[DBG212]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG238:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP19]]), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG236]] +// CHECK1-NEXT: ret void, !dbg [[DBG212]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -710,212 +615,160 @@ // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META224:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG228]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG231]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG231]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG231]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG231]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG236:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG236]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG264]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG267]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG270]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG272]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG275]] -// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]] -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG288]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG290]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG237:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG237]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG237]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG241:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[G]], align 8, !dbg [[DBG243]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG246:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG246]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG245]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG248]] +// CHECK1-NEXT: store i32 5, ptr [[TMP4]], align 4, !dbg [[DBG249:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG251:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG250]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG250]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG252:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG253:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG253]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG254:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG253]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG253]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG256:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG257:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG259:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG258]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG258]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG260:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG261:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG262:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG261]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG261]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG261]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0, !dbg [[DBG261]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG264:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP267:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]] -// CHECK1-NEXT: ret void, !dbg [[DBG306]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG266:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG268:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG269:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META272:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273:![0-9]+]] // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]] -// CHECK1-NEXT: ret void, !dbg [[DBG315]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG277:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG277]] +// CHECK1-NEXT: ret void, !dbg [[DBG277]] // diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -52,284 +52,290 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP58]], 1 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP61]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP62]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP66]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[TMP67]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP68]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP65]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP69]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 -// CHECK1-NEXT: [[TMP74:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP73]], ptr [[TMP65]]) +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP67]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[TMP70]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP71]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP68]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP72]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP76]], ptr [[TMP68]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP75:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP75]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP77]]) -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP79]], i32 1) -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP80]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP86]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP80]]) +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP82]], i32 1) +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP88]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP89]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP87]], [[TMP88]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP89]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP90]], [[TMP91]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP89]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP85]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP88]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP93:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP92]] monotonic, align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP94]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP95]] monotonic, align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP96:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP101:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP96]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP96]], i8 [[TMP99]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP101]] = extractvalue { i8, i1 } [[TMP100]], 0 -// CHECK1-NEXT: [[TMP102:%.*]] = extractvalue { i8, i1 } [[TMP100]], 1 -// CHECK1-NEXT: br i1 [[TMP102]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP99:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP104:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP99]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP99]], i8 [[TMP102]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP104]] = extractvalue { i8, i1 } [[TMP103]], 0 +// CHECK1-NEXT: [[TMP105:%.*]] = extractvalue { i8, i1 } [[TMP103]], 1 +// CHECK1-NEXT: br i1 [[TMP105]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP94]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP103]]) +// CHECK1-NEXT: [[TMP106:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP106]]) // CHECK1-NEXT: ret void // // @@ -468,20 +474,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -495,7 +501,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -311,7 +311,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -619,15 +619,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -637,52 +639,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 33, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -761,27 +765,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -793,52 +797,58 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -846,19 +856,19 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -870,33 +880,32 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -911,95 +920,103 @@ // CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -1010,27 +1027,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1040,63 +1057,69 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1117,8 +1140,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1137,31 +1159,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1171,125 +1202,131 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK1-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1648,7 +1685,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1658,23 +1695,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1684,75 +1726,79 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1767,45 +1813,53 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1815,30 +1869,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1848,68 +1903,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 11, ptr [[I]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1942,7 +2003,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -2246,15 +2307,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2264,52 +2327,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 33, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2388,24 +2453,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2414,76 +2481,80 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 1, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP21]], ptr [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2495,33 +2566,32 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2536,95 +2606,103 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2635,27 +2713,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2665,63 +2743,69 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2742,8 +2826,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2762,31 +2845,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2796,125 +2888,131 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK3-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3273,7 +3371,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3283,23 +3381,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3309,75 +3412,79 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3392,45 +3499,53 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3440,30 +3555,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3473,68 +3589,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 11, ptr [[I]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3567,7 +3689,7 @@ // CHECK5-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK5-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -3875,15 +3997,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK5-SAME: () #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3893,52 +4017,54 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 33, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4017,27 +4143,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -4049,52 +4175,58 @@ // CHECK5-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK5-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -4102,19 +4234,19 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 1, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK5-NEXT: store i64 [[TMP16]], ptr [[K_ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[K]], align 8 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -4126,33 +4258,32 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -4167,95 +4298,103 @@ // CHECK5-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK5-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK5-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK5-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK5-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK5-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK5-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK5-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK5-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -4266,27 +4405,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4296,63 +4435,69 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK5-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK5-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4373,8 +4518,7 @@ // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4393,31 +4537,40 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4427,125 +4580,131 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK5-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK5-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK5-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK5-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK5-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK5-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK5-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK5-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK5-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK5-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK5-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK5-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK5-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK5-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK5-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK5-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK5-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK5-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK5-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4926,8 +5085,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -4941,25 +5099,33 @@ // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK5-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] +// CHECK5-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -4967,16 +5133,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4986,136 +5149,144 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[A3]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK5-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK5: cond.true9: -// CHECK5-NEXT: br label [[COND_END11:%.*]] -// CHECK5: cond.false10: -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: br label [[COND_END11]] -// CHECK5: cond.end11: -// CHECK5-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK5-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK5: omp.inner.for.cond13: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK5: omp.inner.for.body15: -// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK5-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK5-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK5-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD19]], ptr [[A20]], align 8 -// CHECK5-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 8 -// CHECK5-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC22]], ptr [[A21]], align 8 -// CHECK5-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK5-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP27]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i64 1 -// CHECK5-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK5: omp.body.continue26: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK5-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end29: +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK5: cond.true8: +// CHECK5-NEXT: br label [[COND_END10:%.*]] +// CHECK5: cond.false9: +// CHECK5-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: br label [[COND_END10]] +// CHECK5: cond.end10: +// CHECK5-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK5-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: +// CHECK5-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK5-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK5-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK5-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK5-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD18]], ptr [[A19]], align 8 +// CHECK5-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 8 +// CHECK5-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC21]], ptr [[A20]], align 8 +// CHECK5-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i64 1 +// CHECK5-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK5: omp.body.continue25: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK5: omp.inner.for.inc26: +// CHECK5-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK5-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end28: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK5-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5130,45 +5301,53 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK5-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: ret void // // @@ -5178,30 +5357,31 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5211,68 +5391,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK5-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 11, ptr [[I]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5305,7 +5491,7 @@ // CHECK7-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK7-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -5609,15 +5795,17 @@ // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK7-SAME: () #[[ATTR2:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5627,52 +5815,54 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 33, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5751,24 +5941,26 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5777,76 +5969,80 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK7-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK7-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK7-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK7-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK7-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 1, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK7-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK7-NEXT: store i64 [[TMP17]], ptr [[TMP0]], align 8 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[K]], align 8 +// CHECK7-NEXT: store i64 [[TMP21]], ptr [[TMP4]], align 8 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5858,33 +6054,32 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -5899,95 +6094,103 @@ // CHECK7-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK7-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK7-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK7-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK7-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK7-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK7-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK7-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK7-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK7-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK7-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK7-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK7-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK7-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5998,27 +6201,27 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6028,63 +6231,69 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 -// CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 +// CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK7-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6105,8 +6314,7 @@ // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -6125,31 +6333,40 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6159,125 +6376,131 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK7-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK7-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK7-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK7-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK7-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK7-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK7-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK7-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK7-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK7-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK7-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK7-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK7-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK7-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK7-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK7-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK7-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK7-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK7-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK7-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK7-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK7-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK7-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6658,8 +6881,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -6673,25 +6895,33 @@ // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..10, ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK7-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] +// CHECK7-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -6699,16 +6929,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6718,136 +6945,144 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], ptr [[A3]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK7-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK7-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK7: cond.true9: -// CHECK7-NEXT: br label [[COND_END11:%.*]] -// CHECK7: cond.false10: -// CHECK7-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: br label [[COND_END11]] -// CHECK7: cond.end11: -// CHECK7-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK7-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK7: omp.inner.for.cond13: -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK7-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK7: omp.inner.for.body15: -// CHECK7-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK7-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK7-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK7-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD19]], ptr [[A20]], align 4 -// CHECK7-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 4 -// CHECK7-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC22]], ptr [[A21]], align 4 -// CHECK7-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK7-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP27]] -// CHECK7-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i32 1 -// CHECK7-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK7: omp.body.continue26: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK7-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end29: +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK7: cond.true8: +// CHECK7-NEXT: br label [[COND_END10:%.*]] +// CHECK7: cond.false9: +// CHECK7-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: br label [[COND_END10]] +// CHECK7: cond.end10: +// CHECK7-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK7-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: +// CHECK7-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK7-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK7-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK7-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK7-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD18]], ptr [[A19]], align 4 +// CHECK7-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 4 +// CHECK7-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC21]], ptr [[A20]], align 4 +// CHECK7-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i32 1 +// CHECK7-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK7: omp.body.continue25: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK7: omp.inner.for.inc26: +// CHECK7-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK7-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end28: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK7-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6862,45 +7097,53 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK7-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK7-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK7-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: ret void // // @@ -6910,30 +7153,31 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6943,68 +7187,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK7-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 11, ptr [[I]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9026,15 +9276,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9044,52 +9296,54 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 33, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9103,33 +9357,32 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -9144,95 +9397,103 @@ // CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK17-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK17-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK17-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK17-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -9249,27 +9510,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9279,63 +9540,69 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9356,8 +9623,7 @@ // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9376,31 +9642,40 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9410,125 +9685,131 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK17-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9543,45 +9824,53 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -9593,7 +9882,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9603,23 +9892,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9629,75 +9923,79 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9711,30 +10009,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9744,68 +10043,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK17-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 11, ptr [[I]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9816,15 +10121,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK19-SAME: () #[[ATTR0:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9834,52 +10141,54 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 33, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9893,33 +10202,32 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -9934,95 +10242,103 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK19-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK19-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK19-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -10039,27 +10355,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10069,63 +10385,69 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK19-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10146,8 +10468,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10166,31 +10487,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10200,125 +10530,131 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK19-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10333,45 +10669,53 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -10383,7 +10727,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10393,23 +10737,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10419,75 +10768,79 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP27]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10501,30 +10854,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10534,68 +10888,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK19-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 11, ptr [[I]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10606,15 +10966,17 @@ // CHECK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10624,52 +10986,54 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 33, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10683,33 +11047,32 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -10724,95 +11087,103 @@ // CHECK21-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK21-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK21-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK21-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK21-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK21-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK21-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK21-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK21-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK21-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK21-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK21-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK21-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK21-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK21-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK21: .omp.linear.pu: -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK21-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK21-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK21-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK21-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK21-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK21: .omp.linear.pu.done: // CHECK21-NEXT: ret void @@ -10829,27 +11200,27 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10859,63 +11230,69 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK21-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK21-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK21-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK21-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10936,8 +11313,7 @@ // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10956,31 +11332,40 @@ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK21-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10990,125 +11375,131 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK21-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK21-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK21-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK21-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK21-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK21-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK21-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK21-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK21-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK21-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK21-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK21-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK21-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK21-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK21-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK21-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK21-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK21-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK21-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK21-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK21-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK21-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK21-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK21-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK21-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK21-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK21-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11123,45 +11514,53 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK21-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK21-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK21-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: ret void // // @@ -11174,8 +11573,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -11189,25 +11587,33 @@ // CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK21-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK21-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -11215,16 +11621,13 @@ // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11234,136 +11637,144 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK21-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], ptr [[A3]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP26]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK21-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK21-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK21-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK21: cond.true9: -// CHECK21-NEXT: br label [[COND_END11:%.*]] -// CHECK21: cond.false10: -// CHECK21-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: br label [[COND_END11]] -// CHECK21: cond.end11: -// CHECK21-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK21-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK21: omp.inner.for.cond13: -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK21-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK21: omp.inner.for.body15: -// CHECK21-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK21-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK21-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK21-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD19]], ptr [[A20]], align 8 -// CHECK21-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 8 -// CHECK21-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC22]], ptr [[A21]], align 8 -// CHECK21-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK21-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP27]] -// CHECK21-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i64 1 -// CHECK21-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK21: omp.body.continue26: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK21: omp.inner.for.inc27: -// CHECK21-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK21-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] -// CHECK21: omp.inner.for.end29: +// CHECK21-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK21: cond.true8: +// CHECK21-NEXT: br label [[COND_END10:%.*]] +// CHECK21: cond.false9: +// CHECK21-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: br label [[COND_END10]] +// CHECK21: cond.end10: +// CHECK21-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK21-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: +// CHECK21-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK21-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK21-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK21-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD18]], ptr [[A19]], align 8 +// CHECK21-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 8 +// CHECK21-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC21]], ptr [[A20]], align 8 +// CHECK21-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i64 1 +// CHECK21-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK21: omp.body.continue25: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK21: omp.inner.for.inc26: +// CHECK21-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK21-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK21: omp.inner.for.end28: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK21-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK21-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11377,30 +11788,31 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11410,68 +11822,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK21-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK21-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 11, ptr [[I]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11482,15 +11900,17 @@ // CHECK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK23-SAME: () #[[ATTR0:[0-9]+]] { // CHECK23-NEXT: entry: -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11500,52 +11920,54 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 33, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11559,33 +11981,32 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -11600,95 +12021,103 @@ // CHECK23-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK23-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK23-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK23-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK23-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK23-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK23-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK23-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK23-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK23-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK23-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK23-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK23-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK23-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK23-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK23: .omp.linear.pu: -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK23-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK23-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK23-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK23-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK23-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK23: .omp.linear.pu.done: // CHECK23-NEXT: ret void @@ -11705,27 +12134,27 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11735,63 +12164,69 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK23-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK23-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11812,8 +12247,7 @@ // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11832,31 +12266,40 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11866,125 +12309,131 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK23-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK23-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK23-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK23-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK23-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK23-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK23-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK23-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK23-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK23-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK23-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK23-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK23-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK23-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK23-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK23-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK23-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK23-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK23-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK23-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK23-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK23-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK23-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK23-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK23-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK23-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11999,45 +12448,53 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK23-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK23-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK23-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: ret void // // @@ -12050,8 +12507,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -12065,25 +12521,33 @@ // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK23-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK23-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -12091,16 +12555,13 @@ // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -12110,136 +12571,144 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK23-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK23-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], ptr [[A3]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP27]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK23-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK23-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK23-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK23: cond.true9: -// CHECK23-NEXT: br label [[COND_END11:%.*]] -// CHECK23: cond.false10: -// CHECK23-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: br label [[COND_END11]] -// CHECK23: cond.end11: -// CHECK23-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK23-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK23: omp.inner.for.cond13: -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK23-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK23: omp.inner.for.body15: -// CHECK23-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK23-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK23-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK23-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD19]], ptr [[A20]], align 4 -// CHECK23-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 4 -// CHECK23-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC22]], ptr [[A21]], align 4 -// CHECK23-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK23-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP27]] -// CHECK23-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i32 1 -// CHECK23-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK23: omp.body.continue26: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK23: omp.inner.for.inc27: -// CHECK23-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK23-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP31:![0-9]+]] -// CHECK23: omp.inner.for.end29: +// CHECK23-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK23: cond.true8: +// CHECK23-NEXT: br label [[COND_END10:%.*]] +// CHECK23: cond.false9: +// CHECK23-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: br label [[COND_END10]] +// CHECK23: cond.end10: +// CHECK23-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK23-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: +// CHECK23-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK23-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK23-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK23-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD18]], ptr [[A19]], align 4 +// CHECK23-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 4 +// CHECK23-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC21]], ptr [[A20]], align 4 +// CHECK23-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i32 1 +// CHECK23-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK23: omp.body.continue25: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK23: omp.inner.for.inc26: +// CHECK23-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK23-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK23: omp.inner.for.end28: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK23-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK23-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12253,30 +12722,31 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -12286,68 +12756,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK23-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK23-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 11, ptr [[I]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -628,7 +628,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -636,20 +636,22 @@ // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -657,21 +659,25 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -681,23 +687,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -705,16 +714,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -723,6 +734,7 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -731,13 +743,13 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -745,29 +757,36 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -775,34 +794,39 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -811,36 +835,42 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1297,7 +1327,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1305,20 +1335,22 @@ // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1326,21 +1358,25 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1350,23 +1386,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1374,16 +1413,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1392,6 +1433,7 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1400,13 +1442,13 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1414,29 +1456,36 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1444,34 +1493,39 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1480,36 +1534,42 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1524,6 +1584,7 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1532,13 +1593,13 @@ // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK9-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1546,29 +1607,36 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1578,7 +1646,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1586,20 +1654,22 @@ // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1607,21 +1677,25 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1631,23 +1705,26 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1655,16 +1732,18 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1673,34 +1752,39 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1709,36 +1793,42 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1746,6 +1836,7 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1754,13 +1845,13 @@ // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK11-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1768,29 +1859,36 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1800,7 +1898,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1808,20 +1906,22 @@ // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1829,21 +1929,25 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1853,23 +1957,26 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1877,16 +1984,18 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1895,34 +2004,39 @@ // CHECK11-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // // @@ -1931,35 +2045,41 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -598,7 +598,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -606,29 +606,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -637,25 +643,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -664,21 +675,25 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -686,40 +701,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -729,8 +752,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -738,32 +760,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1183,7 +1212,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1191,29 +1220,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1222,25 +1257,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1249,21 +1289,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1271,40 +1315,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1314,8 +1366,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1323,32 +1374,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1363,21 +1421,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1385,21 +1447,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1409,7 +1475,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1417,29 +1483,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1448,25 +1520,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1474,19 +1551,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1496,8 +1577,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1505,32 +1585,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1538,21 +1625,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1560,21 +1651,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1584,7 +1679,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1592,29 +1687,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1623,25 +1724,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1649,19 +1755,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1671,8 +1781,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1680,31 +1789,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -52,230 +52,236 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP57]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP58]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP59]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP60]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP57]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP65]], ptr [[TMP57]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP59]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP60]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP61]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP63]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP60]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 // CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP68]], i32 0) -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP72:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP72]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP74]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP75]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP68]], ptr [[TMP60]]) +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP71]], i32 0) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP78]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP76]], [[TMP77]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP78]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP79]], [[TMP80]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP79:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP79]] to i32 -// CHECK1-NEXT: [[TMP80:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP80]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP82]] to i32 +// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP83]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP78]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP74]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP81]] monotonic, align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP83]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP84]] monotonic, align 4 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP85:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP90:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP85]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP86]] to i32 -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP89:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP85]], i8 [[TMP88]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP90]] = extractvalue { i8, i1 } [[TMP89]], 0 -// CHECK1-NEXT: [[TMP91:%.*]] = extractvalue { i8, i1 } [[TMP89]], 1 -// CHECK1-NEXT: br i1 [[TMP91]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP88:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP93:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP88]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP88]], i8 [[TMP91]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP93]] = extractvalue { i8, i1 } [[TMP92]], 0 +// CHECK1-NEXT: [[TMP94:%.*]] = extractvalue { i8, i1 } [[TMP92]], 1 +// CHECK1-NEXT: br i1 [[TMP94]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP83]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP92]]) +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP95]]) // CHECK1-NEXT: ret void // // @@ -414,20 +420,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -441,7 +447,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -322,7 +322,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -400,13 +400,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -746,7 +746,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -754,22 +754,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -848,8 +853,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 @@ -902,27 +907,32 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -930,29 +940,34 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -961,39 +976,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -1009,7 +1030,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1027,82 +1048,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1110,41 +1144,49 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i64 [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK1-NEXT: ret void // // @@ -1152,38 +1194,47 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[NN_ADDR]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[NN]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1248,23 +1299,28 @@ // CHECK1-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, i64 [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1619,7 +1675,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1629,48 +1685,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..23, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1681,59 +1746,67 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..26, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1744,47 +1817,54 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..29, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1816,7 +1896,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -1892,13 +1972,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2240,7 +2320,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2248,22 +2328,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2342,8 +2427,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 @@ -2396,27 +2481,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -2424,29 +2514,34 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2455,39 +2550,45 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2503,7 +2604,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2521,82 +2622,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2604,41 +2718,49 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i32 [[TMP1]]) +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK3-NEXT: ret void // // @@ -2646,38 +2768,47 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[NN_ADDR]]) +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[NN]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -2741,23 +2872,28 @@ // CHECK3-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, i32 [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[F:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -3112,7 +3248,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3122,48 +3258,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..23, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -3174,59 +3319,67 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..26, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -3237,47 +3390,54 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..29, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -3295,7 +3455,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3303,22 +3463,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3326,29 +3491,34 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3357,39 +3527,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3405,7 +3581,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3423,82 +3599,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK9-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK9-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK9-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK9-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -3506,41 +3695,49 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i64 [[TMP1]]) +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK9-NEXT: ret void // // @@ -3548,38 +3745,47 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[NN_ADDR]]) +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[NN]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3587,23 +3793,28 @@ // CHECK9-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, i64 [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3614,59 +3825,67 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..9, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -3679,7 +3898,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3689,48 +3908,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -3740,47 +3968,54 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -3791,7 +4026,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -3799,22 +4034,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3822,29 +4062,34 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3853,39 +4098,45 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3901,7 +4152,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3919,82 +4170,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -4002,41 +4266,49 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i32 [[TMP1]]) +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK11-NEXT: ret void // // @@ -4044,38 +4316,47 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[NN_ADDR]]) +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[NN]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -4083,23 +4364,28 @@ // CHECK11-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, i32 [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[F:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -4110,59 +4396,67 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..9, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // @@ -4175,7 +4469,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4185,48 +4479,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -4236,47 +4539,54 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -314,7 +314,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -386,13 +386,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -654,7 +654,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -662,19 +662,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -684,50 +685,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -806,8 +811,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 @@ -860,21 +865,22 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -884,53 +890,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -938,21 +948,22 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -962,55 +973,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -1019,27 +1034,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1049,59 +1064,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1118,8 +1139,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1138,31 +1158,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1172,121 +1201,127 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1672,7 +1707,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1682,23 +1717,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1708,72 +1748,76 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], ptr [[A]], align 8 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1785,42 +1829,41 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1834,98 +1877,108 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1937,30 +1990,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1970,65 +2024,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -2059,7 +2119,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -2129,13 +2189,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2399,7 +2459,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2407,19 +2467,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2429,50 +2490,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2551,8 +2616,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 @@ -2605,21 +2670,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2629,53 +2695,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2683,21 +2753,22 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2707,55 +2778,59 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2764,27 +2839,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2794,59 +2869,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2863,8 +2944,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2883,31 +2963,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2917,121 +3006,127 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3417,7 +3512,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3427,23 +3522,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3453,72 +3553,76 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3530,42 +3634,41 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3579,98 +3682,108 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3682,30 +3795,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3715,65 +3829,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3790,7 +3910,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3798,19 +3918,20 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3820,50 +3941,54 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3871,21 +3996,22 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3895,55 +4021,59 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK9-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3952,27 +4082,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3982,59 +4112,65 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -4051,8 +4187,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4071,31 +4206,40 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4105,121 +4249,127 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK9-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK9-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK9-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4231,42 +4381,41 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4280,98 +4429,108 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK9-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK9-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK9-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK9-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK9-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK9-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK9-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -4385,7 +4544,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4395,23 +4554,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4421,72 +4585,76 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double [[ADD4]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A5]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK9-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: store double [[ADD2]], ptr [[A]], align 8 +// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4496,30 +4664,31 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4529,65 +4698,71 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // @@ -4597,7 +4772,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -4605,19 +4780,20 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4627,50 +4803,54 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4678,21 +4858,22 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4702,55 +4883,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4759,27 +4944,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4789,59 +4974,65 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4858,8 +5049,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4878,31 +5068,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4912,121 +5111,127 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -5038,42 +5243,41 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5087,98 +5291,108 @@ // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK11-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK11-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK11-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK11-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK11-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK11-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK11-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK11-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK11-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -5192,7 +5406,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -5202,23 +5416,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5228,72 +5447,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double [[ADD4]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A5]], align 4 -// CHECK11-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: store double [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5303,30 +5526,31 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5336,64 +5560,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -167,18 +167,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -190,68 +193,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -335,18 +340,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -358,66 +366,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -577,8 +587,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -587,157 +596,167 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -803,18 +822,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -826,67 +848,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1045,8 +1069,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1055,155 +1078,165 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1269,18 +1302,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1292,65 +1328,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -287,18 +287,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -308,56 +311,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -365,18 +370,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -386,56 +394,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -443,18 +453,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -464,73 +477,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -706,18 +721,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -727,55 +745,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -783,18 +803,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -804,55 +827,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -860,18 +885,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -881,72 +909,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1244,27 +1274,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1277,75 +1310,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1357,27 +1394,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1390,75 +1430,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1471,33 +1515,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1510,94 +1556,100 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1754,18 +1806,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1775,55 +1830,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1831,18 +1888,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1852,55 +1912,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1908,18 +1970,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1929,72 +1994,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -2294,27 +2361,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2327,74 +2397,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2406,27 +2480,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2439,74 +2516,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2519,33 +2600,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2558,93 +2641,99 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2801,18 +2890,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2822,54 +2914,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2877,18 +2971,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2898,54 +2995,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2953,18 +3052,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2974,71 +3076,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -347,8 +347,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -357,138 +356,148 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -707,7 +716,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -716,23 +725,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -740,112 +753,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1156,8 +1173,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1166,136 +1182,146 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1514,7 +1540,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1523,23 +1549,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1547,110 +1577,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1883,35 +1917,33 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1923,65 +1955,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -160,41 +160,39 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP0]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: store double [[TMP3]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP7]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -211,84 +209,94 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP18]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP19]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP22]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -327,8 +335,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -336,26 +343,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -363,95 +373,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP31]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -617,8 +633,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -628,27 +643,31 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -657,28 +676,34 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -688,99 +713,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP22]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -964,7 +989,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -973,23 +998,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -998,26 +1027,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1027,97 +1060,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1325,8 +1358,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1336,27 +1368,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1365,28 +1401,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1396,97 +1438,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP22]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1670,7 +1712,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1679,23 +1721,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1704,26 +1750,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1733,95 +1783,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -122,7 +122,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -132,20 +132,22 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -155,90 +157,106 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -251,72 +269,80 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -324,21 +350,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -352,27 +378,30 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -382,93 +411,110 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -481,85 +527,93 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -764,7 +818,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -774,20 +828,22 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -797,90 +853,106 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -893,72 +965,80 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK2: .cancel.exit: // CHECK2-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK2: .cancel.continue: @@ -966,21 +1046,21 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: cancel.exit: -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK2-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK2: omp.precond.end: // CHECK2-NEXT: br label [[CANCEL_CONT]] @@ -994,27 +1074,30 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1024,93 +1107,110 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1123,85 +1223,93 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1413,7 +1521,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -1423,20 +1531,22 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i32 [[TMP5]], ptr [[TMP1]]) +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1446,88 +1556,104 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1540,69 +1666,77 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK4-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK4: .cancel.continue: @@ -1610,21 +1744,21 @@ // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK4-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] @@ -1638,27 +1772,30 @@ // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1668,91 +1805,108 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1765,82 +1919,90 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK4-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -1860,7 +2022,7 @@ // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1870,20 +2032,22 @@ // CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1893,90 +2057,106 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK10-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK10-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1989,72 +2169,80 @@ // CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK10-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK10-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK10-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK10: .cancel.continue: @@ -2062,21 +2250,21 @@ // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK10-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK10-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] @@ -2090,27 +2278,30 @@ // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2120,93 +2311,110 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK10-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK10-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2219,85 +2427,93 @@ // CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK10-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void @@ -2310,7 +2526,7 @@ // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2320,20 +2536,22 @@ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i32 [[TMP5]], ptr [[TMP1]]) +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2343,88 +2561,104 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK12-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2437,69 +2671,77 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK12-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK12-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK12-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK12-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK12: .cancel.exit: // CHECK12-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK12: .cancel.continue: @@ -2507,21 +2749,21 @@ // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK12-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK12-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: cancel.exit: -// CHECK12-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK12-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK12-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK12: omp.precond.end: // CHECK12-NEXT: br label [[CANCEL_CONT]] @@ -2535,27 +2777,30 @@ // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2565,91 +2810,108 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2662,82 +2924,90 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK12-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK12-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -172,18 +172,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -191,66 +194,81 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -262,76 +280,82 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -415,18 +439,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -434,64 +461,79 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -503,72 +545,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -728,8 +776,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -738,283 +785,316 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP37]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1080,18 +1160,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1099,66 +1182,81 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1170,75 +1268,81 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1397,8 +1501,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1407,285 +1510,318 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP41]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1751,18 +1887,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1770,64 +1909,79 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1839,71 +1993,77 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -299,83 +299,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -385,64 +403,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -450,83 +474,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -536,64 +578,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -601,103 +649,121 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -707,64 +773,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -940,81 +1012,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1024,61 +1114,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1086,81 +1182,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1170,61 +1284,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1232,101 +1352,119 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1336,61 +1474,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1690,27 +1834,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1720,93 +1867,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1819,83 +1983,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1907,27 +2079,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1937,93 +2112,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2036,83 +2228,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2125,33 +2325,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2161,124 +2363,142 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2291,84 +2511,94 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2540,83 +2770,101 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2626,63 +2874,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2690,83 +2944,101 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2776,63 +3048,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2841,116 +3119,134 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2960,64 +3256,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3319,27 +3623,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3349,91 +3656,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3446,80 +3770,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3531,27 +3863,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3561,91 +3896,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3658,80 +4010,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3744,33 +4104,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3780,122 +4142,140 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3908,81 +4288,91 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4154,81 +4544,99 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4238,60 +4646,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4299,81 +4713,99 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4383,60 +4815,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4445,114 +4883,132 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4562,61 +5018,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -408,8 +408,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -418,133 +417,158 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -584,137 +608,145 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -722,10 +754,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -737,9 +769,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -808,12 +840,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -898,7 +930,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -907,23 +939,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -931,107 +967,127 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1051,16 +1107,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1068,120 +1122,128 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1201,7 +1263,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1215,7 +1277,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1233,9 +1295,9 @@ // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1477,8 +1539,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1487,131 +1548,156 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1651,133 +1737,141 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1785,10 +1879,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1800,9 +1894,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1871,12 +1965,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1961,7 +2055,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1970,23 +2064,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1994,105 +2092,125 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2112,16 +2230,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2129,116 +2245,124 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2258,7 +2382,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2272,7 +2396,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2290,9 +2414,9 @@ // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2456,35 +2580,33 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2492,82 +2614,98 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2576,76 +2714,88 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK5-NEXT: ret void // // @@ -2673,8 +2823,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -2683,133 +2832,158 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done3: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK13-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -2849,137 +3023,145 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK13-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done13: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -3001,7 +3183,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -3010,23 +3192,27 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3034,107 +3220,127 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK13-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done10: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -3154,16 +3360,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3171,120 +3375,128 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done5: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK13-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done14: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // @@ -3370,9 +3582,9 @@ // CHECK13-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK13-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -3389,8 +3601,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -3399,131 +3610,156 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -3563,133 +3799,141 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -3711,7 +3955,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -3720,23 +3964,27 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3744,105 +3992,125 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK15-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // @@ -3862,16 +4130,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3879,116 +4145,124 @@ // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // @@ -4074,9 +4348,9 @@ // CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK15-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4092,35 +4366,33 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4128,82 +4400,98 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4215,72 +4503,84 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -184,77 +184,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -264,139 +280,161 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -406,43 +444,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -450,14 +494,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -580,77 +624,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -660,43 +720,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -704,96 +770,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -803,43 +885,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -847,14 +935,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -862,100 +950,119 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -965,43 +1072,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -1009,14 +1122,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1128,77 +1241,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1208,43 +1337,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1252,96 +1387,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1351,43 +1502,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1395,14 +1552,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1410,100 +1567,119 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1513,43 +1689,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1557,14 +1739,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -195,41 +195,39 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -237,110 +235,128 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, ptr [[TMP12]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP17]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store double [[TMP19]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, ptr [[TMP26]], align 8 +// CHECK1-NEXT: store double [[TMP27]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store double [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP29]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP38]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP42]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -354,94 +370,108 @@ // CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -480,8 +510,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 @@ -489,26 +518,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -516,102 +548,120 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP15]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP10]], i32 [[TMP11]], ptr [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], ptr [[G2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP34]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store volatile double [[TMP36]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP38]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -619,100 +669,110 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -878,8 +938,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -889,27 +948,31 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -917,31 +980,38 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -951,111 +1021,123 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]], i64 [[TMP19]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP41]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP42]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP29]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP43]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP44]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1064,36 +1146,46 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1103,99 +1195,99 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1213,10 +1305,10 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1228,9 +1320,9 @@ // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1299,12 +1391,12 @@ // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1379,7 +1471,7 @@ // CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1388,23 +1480,27 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1412,136 +1508,154 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP25]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP39]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1550,134 +1664,142 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP33]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP34]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // @@ -1697,7 +1819,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1710,7 +1832,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void @@ -1885,8 +2007,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1896,27 +2017,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1924,31 +2049,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -1958,109 +2090,121 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP16]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]], i32 [[TMP17]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP39]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP40]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP27]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP41]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP42:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP42]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2069,34 +2213,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2106,97 +2260,97 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2214,10 +2368,10 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK7-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2229,9 +2383,9 @@ // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK7-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2300,12 +2454,12 @@ // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2380,7 +2534,7 @@ // CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2389,23 +2543,27 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2413,134 +2571,152 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]]) +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP23]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2549,130 +2725,138 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP33]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP34]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // @@ -2692,7 +2876,7 @@ // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, ptr [[F]], align 4 // CHECK7-NEXT: ret void // @@ -2705,7 +2889,7 @@ // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -65,77 +65,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -145,57 +161,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -340,29 +340,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -378,55 +385,63 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -436,12 +451,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -456,14 +472,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -479,68 +501,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -553,10 +575,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -564,9 +586,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -604,12 +626,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -645,15 +667,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -661,81 +685,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -744,12 +781,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -759,97 +797,103 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -873,7 +917,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -887,7 +931,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1080,29 +1124,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1118,53 +1169,61 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1174,12 +1233,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1194,14 +1254,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1215,66 +1281,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1287,10 +1353,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1298,9 +1364,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1338,12 +1404,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1379,15 +1445,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1395,79 +1463,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1476,12 +1557,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1491,93 +1573,99 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1601,7 +1689,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1615,7 +1703,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1774,15 +1862,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1790,68 +1880,82 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1864,74 +1968,80 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // @@ -1954,29 +2064,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1992,55 +2109,63 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2060,12 +2185,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2080,14 +2206,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2103,68 +2235,68 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -2186,15 +2318,17 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2202,81 +2336,94 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2295,12 +2442,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2310,97 +2458,103 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2446,7 +2600,7 @@ // CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK13-NEXT: ret void @@ -2464,29 +2618,36 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2502,53 +2663,61 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2568,12 +2737,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2588,14 +2758,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -2609,66 +2785,66 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -2690,15 +2866,17 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -2706,79 +2884,92 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2797,12 +2988,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -2812,93 +3004,99 @@ // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2944,7 +3142,7 @@ // CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK15-NEXT: ret void @@ -2962,15 +3160,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2978,68 +3178,82 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3055,70 +3269,76 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -130,78 +130,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -211,135 +227,157 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -349,57 +387,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -447,78 +491,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -528,57 +588,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp @@ -149,191 +149,215 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -439,191 +463,215 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -735,187 +783,211 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1021,187 +1093,211 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1268,195 +1364,219 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP25]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -52,259 +52,280 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP58]], i64 [[TMP59]], ptr [[ARGC1]], ptr [[TMP60]]) +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP61]], [[TMP62]] +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP70]], [[TMP71]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP64]]) -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP66]], i32 1) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP73]]) +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP75]], i32 1) +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP82]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP74]], [[TMP75]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP76]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP83]], [[TMP84]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP77:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP77]] to i32 -// CHECK1-NEXT: [[TMP78:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP78]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP86]] to i32 +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP76]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP72]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP79]] monotonic, align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP81]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP82]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP88]] monotonic, align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP83:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP88:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP83]], ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP85]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP87:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP83]], i8 [[TMP86]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP88]] = extractvalue { i8, i1 } [[TMP87]], 0 -// CHECK1-NEXT: [[TMP89:%.*]] = extractvalue { i8, i1 } [[TMP87]], 1 -// CHECK1-NEXT: br i1 [[TMP89]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP92:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP97:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP92]], ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP92]], i8 [[TMP95]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP97]] = extractvalue { i8, i1 } [[TMP96]], 0 +// CHECK1-NEXT: [[TMP98:%.*]] = extractvalue { i8, i1 } [[TMP96]], 1 +// CHECK1-NEXT: br i1 [[TMP98]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP81]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP90:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP90]]) +// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP99]]) // CHECK1-NEXT: ret void // // @@ -392,283 +413,288 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = add nuw i64 [[TMP42]], 1 -// CHECK1-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP44]], ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP51]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP54]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP55]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP56]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP57]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP58]], [[TMP59]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP60]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP67]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP77]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]]) -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1) -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr +// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP93]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 -// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 -// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -807,20 +833,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -834,7 +860,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -453,83 +453,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -539,64 +557,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -604,83 +628,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -690,64 +732,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -755,83 +803,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -841,85 +907,91 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -927,83 +999,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1013,55 +1103,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1076,83 +1172,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1162,55 +1276,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1487,81 +1607,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1571,61 +1709,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1633,81 +1777,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1717,61 +1879,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1779,81 +1947,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1863,80 +2049,86 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1944,81 +2136,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2028,52 +2238,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2088,81 +2304,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2172,52 +2406,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2494,83 +2734,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2580,64 +2838,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2645,83 +2909,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2731,64 +3013,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2796,83 +3084,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2882,85 +3188,91 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2968,83 +3280,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3054,55 +3384,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3117,83 +3453,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3203,55 +3557,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3528,81 +3888,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3612,61 +3990,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3674,81 +4058,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3758,61 +4160,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3820,81 +4228,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3904,80 +4330,86 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3985,81 +4417,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4069,52 +4519,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4129,81 +4585,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4213,52 +4687,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4738,27 +5218,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4768,93 +5251,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4867,83 +5367,91 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4955,27 +5463,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4985,93 +5496,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5084,83 +5612,91 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5173,33 +5709,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5209,124 +5747,142 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5339,84 +5895,94 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5428,27 +5994,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5458,93 +6027,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5557,72 +6143,80 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5642,33 +6236,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5678,99 +6274,117 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK13-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5783,74 +6397,84 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6134,83 +6758,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6220,63 +6862,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6284,83 +6932,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6370,63 +7036,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6435,95 +7107,113 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6533,86 +7223,94 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -6620,83 +7318,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6706,54 +7422,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6769,95 +7491,113 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6867,56 +7607,64 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7400,27 +8148,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7430,91 +8181,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7527,80 +8295,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7612,27 +8388,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7642,91 +8421,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7739,80 +8535,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7825,33 +8629,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7861,122 +8667,140 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK15-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7989,81 +8813,91 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -8075,27 +8909,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8105,91 +8942,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8202,69 +9056,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8284,33 +9146,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8320,97 +9184,115 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK15-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8423,71 +9305,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8771,81 +9663,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8855,60 +9765,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8916,81 +9832,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9000,60 +9934,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -9062,93 +10002,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9158,81 +10116,89 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -9240,81 +10206,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9324,51 +10308,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9384,93 +10374,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9480,53 +10488,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -10006,27 +11022,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10036,93 +11055,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10135,83 +11171,91 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10223,27 +11267,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10253,93 +11300,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10352,83 +11416,91 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10441,33 +11513,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10477,124 +11551,142 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10607,84 +11699,94 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10696,27 +11798,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10726,93 +11831,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10825,72 +11947,80 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10910,33 +12040,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10946,99 +12078,117 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK17-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11051,74 +12201,84 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11402,83 +12562,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11488,63 +12666,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11552,83 +12736,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11638,63 +12840,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11703,95 +12911,113 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11801,86 +13027,94 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -11888,83 +13122,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11974,54 +13226,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12037,95 +13295,113 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12135,56 +13411,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12668,27 +13952,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12698,91 +13985,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12795,80 +14099,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12880,27 +14192,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12910,91 +14225,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13007,80 +14339,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -13093,33 +14433,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13129,122 +14471,140 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13257,81 +14617,91 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -13343,27 +14713,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13373,91 +14746,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13470,69 +14860,77 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13552,33 +14950,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13588,97 +14988,115 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13691,71 +15109,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14039,81 +15467,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14123,60 +15569,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -14184,81 +15636,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14268,60 +15738,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -14330,93 +15806,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14426,81 +15920,89 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -14508,81 +16010,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14592,51 +16112,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14652,93 +16178,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14748,53 +16292,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -293,8 +293,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -305,24 +304,26 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i64 [[TMP5]], i64 [[TMP7]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -332,93 +333,110 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -427,15 +445,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -451,106 +469,116 @@ // CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -565,27 +593,30 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -595,84 +626,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -685,15 +734,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -706,91 +754,99 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -860,75 +916,93 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1023, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1023 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -937,13 +1011,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -953,63 +1027,69 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1023 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP6]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1235,8 +1315,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1247,24 +1326,26 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP5]], i32 [[TMP7]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1274,91 +1355,108 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1367,15 +1465,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1391,103 +1489,113 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 [[TMP31]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1502,27 +1610,30 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1532,82 +1643,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1620,15 +1749,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1641,88 +1769,96 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1792,73 +1928,91 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1023, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1023 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1867,13 +2021,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1883,61 +2037,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1023 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP6]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2344,8 +2504,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -2356,24 +2515,26 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i64 [[TMP5]], i64 [[TMP7]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2383,93 +2544,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2478,15 +2656,15 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2502,106 +2680,116 @@ // CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2616,27 +2804,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2646,84 +2837,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2736,15 +2945,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2757,91 +2965,99 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2857,75 +3073,93 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1023, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1023 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..5, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2934,13 +3168,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2950,63 +3184,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1023 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP6]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3022,8 +3262,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3034,24 +3273,26 @@ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP5]], i32 [[TMP7]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3061,91 +3302,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -3154,15 +3412,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3178,103 +3436,113 @@ // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 [[TMP31]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -3289,27 +3557,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3319,82 +3590,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3407,15 +3696,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3428,88 +3716,96 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -3525,73 +3821,91 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1023, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1023 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..5, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3600,13 +3914,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3616,61 +3930,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1023, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1023 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1023 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1023, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK11-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK11-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP6]], i32 1 monotonic, align 4, !llvm.access.group [[ACC_GRP24]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 1024, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -172,18 +172,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -191,58 +194,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -252,13 +270,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -270,79 +288,85 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -431,18 +455,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -450,56 +477,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -509,13 +551,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -527,75 +569,81 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -900,8 +948,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -910,145 +957,170 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[M]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK9-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK9-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1057,170 +1129,178 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1288,18 +1368,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1307,58 +1390,73 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1368,13 +1466,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1386,78 +1484,84 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1621,8 +1725,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1631,147 +1734,172 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[M]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1780,170 +1908,178 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2011,18 +2147,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2030,56 +2169,71 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2089,13 +2243,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2107,74 +2261,80 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -299,75 +299,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -376,13 +394,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -392,67 +410,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -464,75 +488,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -541,13 +583,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -557,67 +599,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -629,95 +677,113 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -726,13 +792,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -742,67 +808,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -982,73 +1054,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1057,13 +1147,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1073,64 +1163,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1142,73 +1238,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1217,13 +1331,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1233,64 +1347,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1302,93 +1422,111 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1397,13 +1535,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1413,64 +1551,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2021,27 +2165,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2051,84 +2198,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2141,15 +2306,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2162,89 +2326,97 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2262,27 +2434,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2292,84 +2467,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2382,15 +2575,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2403,89 +2595,97 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2504,33 +2704,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2540,114 +2742,133 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK9-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK9-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -2660,16 +2881,15 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2682,90 +2902,100 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2943,75 +3173,93 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3020,13 +3268,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3036,66 +3284,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3107,75 +3361,93 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3184,13 +3456,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3200,66 +3472,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3272,107 +3550,125 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3381,14 +3677,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3398,67 +3694,75 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3764,27 +4068,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3794,82 +4101,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3882,15 +4207,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3903,86 +4227,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -4000,27 +4332,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4030,82 +4365,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4118,15 +4471,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4139,86 +4491,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -4237,33 +4597,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4273,112 +4635,131 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4391,16 +4772,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4413,87 +4793,97 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4671,73 +5061,91 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4746,13 +5154,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4762,63 +5170,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4830,73 +5244,91 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4905,13 +5337,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4921,63 +5353,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4990,105 +5428,123 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5097,14 +5553,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5114,64 +5570,72 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -406,8 +406,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -416,140 +415,165 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -589,144 +613,152 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -734,10 +766,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -749,9 +781,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -820,12 +852,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -910,7 +942,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -919,23 +951,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -943,114 +979,134 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1070,16 +1126,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1087,127 +1141,135 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1227,7 +1289,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1241,7 +1303,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1259,9 +1321,9 @@ // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1503,8 +1565,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1513,138 +1574,163 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1684,140 +1770,148 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1825,10 +1919,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1840,9 +1934,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1911,12 +2005,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2001,7 +2095,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2010,23 +2104,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2034,112 +2132,132 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2159,16 +2277,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2176,123 +2292,131 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2312,7 +2436,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2326,7 +2450,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2344,9 +2468,9 @@ // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2510,35 +2634,33 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2546,72 +2668,87 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2620,15 +2757,16 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2637,79 +2775,91 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3515,8 +3665,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -3525,140 +3674,165 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done3: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK13-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK13-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -3698,144 +3872,152 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK13-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK13-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done13: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -3857,7 +4039,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -3866,23 +4048,27 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3890,114 +4076,134 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done10: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -4017,16 +4223,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4034,127 +4238,135 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done5: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK13-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done14: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // @@ -4240,9 +4452,9 @@ // CHECK13-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK13-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4259,8 +4471,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -4269,138 +4480,163 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK15-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -4440,140 +4676,148 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -4595,7 +4839,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -4604,23 +4848,27 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4628,112 +4876,132 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // @@ -4753,16 +5021,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4770,123 +5036,131 @@ // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK15-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // @@ -4972,9 +5246,9 @@ // CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK15-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4990,35 +5264,33 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -5026,72 +5298,87 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5100,15 +5387,16 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -5120,76 +5408,88 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -203,81 +203,96 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -286,13 +301,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -302,62 +318,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: store i32 0, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -368,75 +392,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -445,12 +484,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -460,43 +500,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP21]] @@ -504,17 +550,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -641,70 +687,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -713,12 +774,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -728,43 +790,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP27]] @@ -772,17 +840,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -793,75 +861,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -870,12 +953,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -885,43 +969,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP33]] @@ -929,17 +1019,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -951,93 +1041,111 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1046,12 +1154,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1061,43 +1170,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -1105,17 +1220,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1231,70 +1346,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..13, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1303,12 +1433,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1318,43 +1449,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP45]] @@ -1362,17 +1499,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1383,75 +1520,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..14) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1460,12 +1612,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1475,43 +1628,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP51]] @@ -1519,17 +1678,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1541,93 +1700,111 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..17, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: call void @.omp_outlined..17(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @.omp_outlined..17(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1636,12 +1813,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1651,43 +1829,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -1695,17 +1879,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1815,81 +1999,96 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1898,13 +2097,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1914,62 +2114,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1980,75 +2188,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2057,12 +2280,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2072,43 +2296,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP22]] @@ -2116,17 +2346,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2253,70 +2483,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2325,12 +2570,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2340,43 +2586,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP28]] @@ -2384,17 +2636,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2405,75 +2657,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2482,12 +2749,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2497,43 +2765,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2541,17 +2815,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2563,147 +2837,172 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then4: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP22]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK3: omp_if.else5: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK3: omp.inner.for.cond6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK3: omp.inner.for.body8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK3: omp_if.then13: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK3-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK3: omp_if.else14: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_IF_END16]] -// CHECK3: omp_if.end16: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK3: omp.inner.for.inc17: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK3: omp.inner.for.end19: -// CHECK3-NEXT: br label [[OMP_IF_END20]] -// CHECK3: omp_if.end20: +// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK3-NEXT: store i64 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP35]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK3: omp_if.then15: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK3: omp_if.else16: +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP38]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: br label [[OMP_IF_END18]] +// CHECK3: omp_if.end18: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK3: omp.inner.for.inc19: +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK3: omp.inner.for.end21: +// CHECK3-NEXT: br label [[OMP_IF_END22]] +// CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2712,13 +3011,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2728,48 +3028,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP38]] @@ -2777,60 +3087,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP41:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2839,13 +3149,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2855,48 +3166,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP42]] @@ -2904,60 +3225,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP45:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3073,70 +3394,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..14, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3145,12 +3481,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3160,43 +3497,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP49]] @@ -3204,17 +3547,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3225,75 +3568,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..15) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3302,12 +3660,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3317,43 +3676,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3361,17 +3726,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3383,93 +3748,111 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: call void @.omp_outlined..18(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @.omp_outlined..18(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3478,12 +3861,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3493,43 +3877,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -3537,17 +3927,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4250,81 +4640,96 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4333,13 +4738,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4349,62 +4755,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store i32 0, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4415,75 +4829,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4492,12 +4921,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4507,43 +4937,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP21]] @@ -4551,17 +4987,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4688,70 +5124,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4760,12 +5211,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4775,43 +5227,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP27]] @@ -4819,17 +5277,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4840,75 +5298,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4917,12 +5390,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4932,43 +5406,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP33]] @@ -4976,17 +5456,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4998,93 +5478,111 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5093,12 +5591,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5108,43 +5607,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -5152,17 +5657,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5278,70 +5783,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..13, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5350,12 +5870,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5365,43 +5886,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP45]] @@ -5409,17 +5936,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5430,75 +5957,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..14) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5507,12 +6049,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5522,43 +6065,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP51]] @@ -5566,17 +6115,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5588,93 +6137,111 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..17, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: call void @.omp_outlined..17(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @.omp_outlined..17(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5683,12 +6250,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5698,43 +6266,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -5742,17 +6316,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5862,81 +6436,96 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5945,13 +6534,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5961,62 +6551,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6027,75 +6625,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6104,12 +6717,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6119,43 +6733,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP22]] @@ -6163,17 +6783,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6300,70 +6920,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6372,12 +7007,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6387,43 +7023,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP28]] @@ -6431,17 +7073,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6452,75 +7094,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6529,12 +7186,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6544,43 +7202,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6588,17 +7252,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6610,147 +7274,172 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then4: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP22]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK11: omp_if.else5: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK11: omp.inner.for.cond6: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK11: omp.inner.for.body8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK11: omp_if.then13: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK11-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK11: omp_if.else14: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: br label [[OMP_IF_END16]] -// CHECK11: omp_if.end16: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK11: omp.inner.for.inc17: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK11: omp.inner.for.end19: -// CHECK11-NEXT: br label [[OMP_IF_END20]] -// CHECK11: omp_if.end20: +// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP35]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK11: omp_if.then15: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK11: omp_if.else16: +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP38]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: br label [[OMP_IF_END18]] +// CHECK11: omp_if.end18: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK11: omp.inner.for.inc19: +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11: omp.inner.for.end21: +// CHECK11-NEXT: br label [[OMP_IF_END22]] +// CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK11-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6759,13 +7448,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6775,48 +7465,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP38]] @@ -6824,60 +7524,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP41:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6886,13 +7586,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6902,48 +7603,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP42]] @@ -6951,60 +7662,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP45:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7120,70 +7831,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..14, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7192,12 +7918,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7207,43 +7934,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP49]] @@ -7251,17 +7984,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7272,75 +8005,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..15) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7349,12 +8097,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7364,43 +8113,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7408,17 +8163,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7430,93 +8185,111 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: call void @.omp_outlined..18(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @.omp_outlined..18(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7525,12 +8298,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7540,43 +8314,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -7584,17 +8364,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -195,41 +195,39 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -237,117 +235,135 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP13]], ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float [[TMP17]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP19]], ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP27]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 -// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP31]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load double, ptr [[TMP41]], align 8 +// CHECK1-NEXT: store volatile double [[TMP42]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP44]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -361,101 +377,115 @@ // CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -494,8 +524,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 @@ -503,26 +532,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -530,109 +562,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float [[TMP15]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP10]], i32 [[TMP11]], ptr [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], ptr [[G2]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP27]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP36]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[TMP37]], align 4 +// CHECK3-NEXT: store volatile double [[TMP38]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP40]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -640,107 +690,117 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -906,8 +966,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -917,27 +976,31 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -945,31 +1008,38 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -979,118 +1049,130 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]], i64 [[TMP19]]), !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK5-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP43]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP44]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP30]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP31]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP44]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP45:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP45]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP46]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1099,36 +1181,46 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1138,106 +1230,106 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP40]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1255,10 +1347,10 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1270,9 +1362,9 @@ // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1341,12 +1433,12 @@ // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1421,7 +1513,7 @@ // CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1430,23 +1522,27 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1454,143 +1550,161 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]]), !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP39]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP40]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP41]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1599,141 +1713,149 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // @@ -1753,7 +1875,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1766,7 +1888,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void @@ -1941,8 +2063,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1952,27 +2073,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1980,31 +2105,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2014,116 +2146,128 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[SVAR7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: store i32 [[TMP16]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]], i32 [[TMP17]]), !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK7-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP41]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP42]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP29]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP43:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP43]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP44]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2132,34 +2276,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2169,104 +2323,104 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP40]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2284,10 +2438,10 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK7-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2299,9 +2453,9 @@ // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK7-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2370,12 +2524,12 @@ // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2450,7 +2604,7 @@ // CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2459,23 +2613,27 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2483,141 +2641,159 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]]), !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP39]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2626,137 +2802,145 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // @@ -2776,7 +2960,7 @@ // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, ptr [[F]], align 4 // CHECK7-NEXT: ret void // @@ -2789,7 +2973,7 @@ // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -340,29 +340,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -378,62 +385,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -443,12 +458,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -463,14 +479,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -486,75 +508,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -567,10 +589,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -578,9 +600,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -618,12 +640,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -659,15 +681,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -675,88 +699,101 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -765,12 +802,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -780,104 +818,110 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -901,7 +945,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -915,7 +959,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1108,29 +1152,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1146,60 +1197,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1209,12 +1268,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1229,14 +1289,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1250,73 +1316,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1329,10 +1395,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1340,9 +1406,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1380,12 +1446,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1421,15 +1487,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1437,86 +1505,99 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1525,12 +1606,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1540,100 +1622,106 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1657,7 +1745,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1671,7 +1759,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1830,15 +1918,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1846,61 +1936,74 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1909,12 +2012,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1927,77 +2031,83 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2908,29 +3018,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2946,62 +3063,70 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -3021,12 +3146,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3041,14 +3167,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3064,75 +3196,75 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK13-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -3154,15 +3286,17 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3170,88 +3304,101 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3270,12 +3417,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3285,104 +3433,110 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK13-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3428,7 +3582,7 @@ // CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK13-NEXT: ret void @@ -3446,29 +3600,36 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3484,60 +3645,68 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -3557,12 +3726,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3577,14 +3747,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -3598,73 +3774,73 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -3686,15 +3862,17 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -3702,86 +3880,99 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3800,12 +3991,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -3815,100 +4007,106 @@ // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK15-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3954,7 +4152,7 @@ // CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK15-NEXT: ret void @@ -3972,15 +4170,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3988,61 +4188,74 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4051,12 +4264,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -4072,74 +4286,80 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -130,71 +130,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -203,12 +218,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -218,60 +234,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -282,71 +304,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -355,12 +392,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -370,60 +408,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -475,71 +519,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -548,12 +607,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -563,60 +623,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -149,205 +149,229 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -453,205 +477,229 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -763,201 +811,225 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1063,201 +1135,225 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1324,209 +1420,233 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -453,75 +453,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -530,13 +548,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -546,67 +564,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -618,75 +642,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -695,13 +737,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -711,67 +753,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -783,75 +831,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -860,13 +926,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -876,88 +942,94 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -969,75 +1041,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1046,13 +1136,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1062,55 +1152,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1118,9 +1214,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1132,75 +1228,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1209,13 +1323,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1225,55 +1339,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1281,9 +1401,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1557,73 +1677,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1632,13 +1770,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1648,64 +1786,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1717,73 +1861,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1792,13 +1954,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1808,64 +1970,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1877,73 +2045,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1952,13 +2138,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1968,83 +2154,89 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2056,73 +2248,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2131,13 +2341,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2147,52 +2357,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2200,9 +2416,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2214,73 +2430,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2289,13 +2523,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2305,52 +2539,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2358,9 +2598,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2634,75 +2874,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2711,13 +2969,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2727,67 +2985,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2799,75 +3063,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2876,13 +3158,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2892,67 +3174,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2964,75 +3252,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3041,13 +3347,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3057,88 +3363,94 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3150,75 +3462,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3227,13 +3557,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3243,55 +3573,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3299,9 +3635,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3313,75 +3649,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3390,13 +3744,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3406,55 +3760,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3462,9 +3822,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3738,73 +4098,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3813,13 +4191,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3829,64 +4207,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3898,73 +4282,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3973,13 +4375,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3989,64 +4391,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4058,73 +4466,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4133,13 +4559,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4149,83 +4575,89 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4237,73 +4669,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4312,13 +4762,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4328,52 +4778,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4381,9 +4837,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4395,73 +4851,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4470,13 +4944,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4486,52 +4960,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4539,9 +5019,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5403,27 +5883,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5433,84 +5916,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5523,15 +6024,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5544,89 +6044,97 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5644,27 +6152,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5674,84 +6185,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5764,15 +6293,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5785,89 +6313,97 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5886,33 +6422,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5922,114 +6460,133 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK13-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK13-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK13-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -6042,16 +6599,15 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6064,90 +6620,100 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -6165,27 +6731,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6195,84 +6764,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6285,15 +6872,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6306,72 +6892,80 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6379,12 +6973,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -6403,33 +6997,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6439,89 +7035,108 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK13-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK13-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6534,16 +7149,15 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6556,74 +7170,84 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6631,12 +7255,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -6919,75 +7543,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6996,13 +7638,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7012,66 +7654,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7083,75 +7731,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7160,13 +7826,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7176,66 +7842,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7248,86 +7920,104 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7336,14 +8026,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7353,89 +8043,97 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7447,75 +8145,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7524,13 +8240,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7540,54 +8256,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7595,9 +8317,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7610,86 +8332,104 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7698,14 +8438,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7715,56 +8455,64 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7772,9 +8520,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8255,27 +9003,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8285,82 +9036,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8373,15 +9142,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8394,86 +9162,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8491,27 +9267,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8521,82 +9300,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8609,15 +9406,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8630,86 +9426,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8728,33 +9532,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8764,112 +9570,131 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK15-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK15-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK15-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK15-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK15-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK15-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -8882,16 +9707,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8904,87 +9728,97 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -9002,27 +9836,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9032,82 +9869,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -9120,15 +9975,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9141,69 +9995,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9211,12 +10073,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -9235,33 +10097,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9271,87 +10135,106 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK15-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK15-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9364,16 +10247,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9386,71 +10268,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9458,12 +10350,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -9746,73 +10638,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9821,13 +10731,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9837,63 +10747,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9905,73 +10821,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9980,13 +10914,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9996,63 +10930,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10065,84 +11005,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10151,14 +11109,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10168,84 +11126,92 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10257,73 +11223,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10332,13 +11316,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10348,51 +11332,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -10400,9 +11390,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK15-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10415,84 +11405,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10501,14 +11509,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10518,53 +11526,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -10572,9 +11588,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK15-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11051,27 +12067,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11081,84 +12100,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11171,15 +12208,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11192,89 +12228,97 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -11292,27 +12336,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11322,84 +12369,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11412,15 +12477,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11433,89 +12497,97 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -11534,33 +12606,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11570,114 +12644,133 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK17-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK17-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK17-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -11690,16 +12783,15 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11712,90 +12804,100 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK17-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -11813,27 +12915,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11843,84 +12948,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11933,15 +13056,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11954,72 +13076,80 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12027,12 +13157,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -12051,33 +13181,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12087,89 +13219,108 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK17-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -12182,16 +13333,15 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12204,74 +13354,84 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12279,12 +13439,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -12567,75 +13727,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12644,13 +13822,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12660,66 +13838,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12731,75 +13915,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12808,13 +14010,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12824,66 +14026,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12896,86 +14104,104 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12984,14 +14210,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13001,89 +14227,97 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13095,75 +14329,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13172,13 +14424,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13188,54 +14440,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -13243,9 +14501,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13258,86 +14516,104 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13346,14 +14622,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13363,56 +14639,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -13420,9 +14704,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13903,27 +15187,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13933,82 +15220,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14021,15 +15326,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14042,86 +15346,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14139,27 +15451,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14169,82 +15484,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14257,15 +15590,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14278,86 +15610,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14376,33 +15716,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14412,112 +15754,131 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK19-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK19-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK19-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -14530,16 +15891,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14552,87 +15912,97 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -14650,27 +16020,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14680,82 +16053,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14768,15 +16159,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14789,69 +16179,77 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14859,12 +16257,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -14883,33 +16281,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14919,87 +16319,106 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK19-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -15012,16 +16431,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -15034,71 +16452,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15106,12 +16534,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -15394,73 +16822,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15469,13 +16915,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15485,63 +16931,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15553,73 +17005,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15628,13 +17098,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15644,63 +17114,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15713,84 +17189,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15799,14 +17293,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15816,84 +17310,92 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15905,73 +17407,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15980,13 +17500,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15996,51 +17516,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -16048,9 +17574,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -16063,84 +17589,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -16149,14 +17693,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16166,53 +17710,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -16220,9 +17772,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -279,15 +279,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -302,6 +304,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -317,68 +321,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -483,15 +487,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -507,6 +513,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -524,65 +532,65 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -814,15 +822,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -837,6 +847,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -852,66 +864,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1016,15 +1028,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1040,6 +1054,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1057,63 +1073,63 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1311,15 +1327,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1335,63 +1353,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -750,19 +750,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -773,74 +776,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -870,19 +875,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -893,74 +901,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[SUB]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -990,19 +1000,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1012,91 +1025,93 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 1, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[MUL2]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[MUL4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP17:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP22:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[_TMP6]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[_TMP6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[MUL7]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP17]], i32 [[TMP20]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP22]] = extractvalue { i32, i1 } [[TMP21]], 0 -// CHECK1-NEXT: [[TMP23:%.*]] = extractvalue { i32, i1 } [[TMP21]], 1 -// CHECK1-NEXT: br i1 [[TMP23]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP19:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP24:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[_TMP5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP5]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[MUL6]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP19]], i32 [[TMP22]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP24]] = extractvalue { i32, i1 } [[TMP23]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = extractvalue { i32, i1 } [[TMP23]], 1 +// CHECK1-NEXT: br i1 [[TMP25]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1126,19 +1141,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AND_VAR1:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AND_VAR:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1148,115 +1166,117 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i8 1, ptr [[AND_VAR1]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i8 1, ptr [[AND_VAR]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP12]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR1]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR]], align 1 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[AND_VAR1]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP15]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL5]], label [[LAND_RHS6:%.*]], label [[LAND_END8:%.*]] -// CHECK1: land.rhs6: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK1-NEXT: br label [[LAND_END8]] -// CHECK1: land.end8: -// CHECK1-NEXT: [[TMP17:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS6]] ] -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP17]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL4]], label [[LAND_RHS5:%.*]], label [[LAND_END7:%.*]] +// CHECK1: land.rhs5: +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br label [[LAND_END7]] +// CHECK1: land.end7: +// CHECK1-NEXT: [[TMP19:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LAND_RHS5]] ] +// CHECK1-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP19]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP19:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[LAND_END17:%.*]] ] -// CHECK1-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK1-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END17]] -// CHECK1: land.rhs15: -// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: br label [[LAND_END17]] -// CHECK1: land.end17: -// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LAND_RHS15]] ] -// CHECK1-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP22]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP24:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP19]], i8 [[TMP23]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP25]] = extractvalue { i8, i1 } [[TMP24]], 0 -// CHECK1-NEXT: [[TMP26:%.*]] = extractvalue { i8, i1 } [[TMP24]], 1 -// CHECK1-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LAND_END16:%.*]] ] +// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK1-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END16]] +// CHECK1: land.rhs14: +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK1-NEXT: br label [[LAND_END16]] +// CHECK1: land.end16: +// CHECK1-NEXT: [[TMP24:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LAND_RHS14]] ] +// CHECK1-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP24]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 +// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 +// CHECK1-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1293,19 +1313,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[OR_VAR1:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OR_VAR:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1315,115 +1338,117 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i8 0, ptr [[OR_VAR1]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i8 0, ptr [[OR_VAR]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]] // CHECK1: lor.rhs: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP12]], 0 // CHECK1-NEXT: br label [[LOR_END]] // CHECK1: lor.end: -// CHECK1-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LOR_RHS]] ] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR1]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LOR_RHS]] ] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR]], align 1 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[OR_VAR1]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP15]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL5]], label [[LOR_END8:%.*]], label [[LOR_RHS6:%.*]] -// CHECK1: lor.rhs6: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK1-NEXT: br label [[LOR_END8]] -// CHECK1: lor.end8: -// CHECK1-NEXT: [[TMP17:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LOR_RHS6]] ] -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP17]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL4]], label [[LOR_END7:%.*]], label [[LOR_RHS5:%.*]] +// CHECK1: lor.rhs5: +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br label [[LOR_END7]] +// CHECK1: lor.end7: +// CHECK1-NEXT: [[TMP19:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LOR_RHS5]] ] +// CHECK1-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP19]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP19:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[LOR_END17:%.*]] ] -// CHECK1-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK1-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LOR_END17]], label [[LOR_RHS15:%.*]] -// CHECK1: lor.rhs15: -// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: br label [[LOR_END17]] -// CHECK1: lor.end17: -// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LOR_RHS15]] ] -// CHECK1-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP22]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP24:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP19]], i8 [[TMP23]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP25]] = extractvalue { i8, i1 } [[TMP24]], 0 -// CHECK1-NEXT: [[TMP26:%.*]] = extractvalue { i8, i1 } [[TMP24]], 1 -// CHECK1-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LOR_END16:%.*]] ] +// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK1-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LOR_END16]], label [[LOR_RHS14:%.*]] +// CHECK1: lor.rhs14: +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK1-NEXT: br label [[LOR_END16]] +// CHECK1: lor.end16: +// CHECK1-NEXT: [[TMP24:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LOR_RHS14]] ] +// CHECK1-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP24]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 +// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 +// CHECK1-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1460,19 +1485,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1483,74 +1511,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 -1, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 -1, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[AND:%.*]] = and i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[AND]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[AND:%.*]] = and i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[AND]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[AND4:%.*]] = and i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[AND4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[AND3:%.*]] = and i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[AND3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1580,19 +1610,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1603,74 +1636,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[OR]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[OR]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[OR4:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[OR4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[OR3:%.*]] = or i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw or ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1700,19 +1735,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1723,74 +1761,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[XOR]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[XOR]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[XOR4:%.*]] = xor i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[XOR4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[XOR3:%.*]] = xor i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[XOR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw xor ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw xor ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1820,19 +1860,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1843,92 +1886,94 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 -2147483648, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 -2147483648, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sge i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK1: cond.true4: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END6:%.*]] -// CHECK1: cond.false5: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: br label [[COND_END6]] -// CHECK1: cond.end6: -// CHECK1-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK1-NEXT: store i32 [[COND7]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK1: cond.true3: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[COND_END5:%.*]] +// CHECK1: cond.false4: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: br label [[COND_END5]] +// CHECK1: cond.end5: +// CHECK1-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK1-NEXT: store i32 [[COND6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[COND_END12:%.*]] -// CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END12]] -// CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE10]] ], [ [[TMP19]], [[COND_FALSE11]] ] -// CHECK1-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK1: cond.true9: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: br label [[COND_END11:%.*]] +// CHECK1: cond.false10: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[COND_END11]] +// CHECK1: cond.end11: +// CHECK1-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE9]] ], [ [[TMP21]], [[COND_FALSE10]] ] +// CHECK1-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw max ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw max ptr [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1967,19 +2012,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..33 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1990,92 +2038,94 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 2147483647, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 2147483647, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK1: cond.true4: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END6:%.*]] -// CHECK1: cond.false5: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: br label [[COND_END6]] -// CHECK1: cond.end6: -// CHECK1-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK1-NEXT: store i32 [[COND7]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK1: cond.true3: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[COND_END5:%.*]] +// CHECK1: cond.false4: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: br label [[COND_END5]] +// CHECK1: cond.end5: +// CHECK1-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK1-NEXT: store i32 [[COND6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[COND_END12:%.*]] -// CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END12]] -// CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE10]] ], [ [[TMP19]], [[COND_FALSE11]] ] -// CHECK1-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK1: cond.true9: +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: br label [[COND_END11:%.*]] +// CHECK1: cond.false10: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: br label [[COND_END11]] +// CHECK1: cond.end11: +// CHECK1-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE9]] ], [ [[TMP21]], [[COND_FALSE10]] ] +// CHECK1-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw min ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw min ptr [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -2647,19 +2697,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..37, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..37, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..37 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2670,74 +2723,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -2768,22 +2823,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..41, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..41, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..41 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2794,79 +2853,81 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[SUB]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[TMP14]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.42, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.42, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -2897,22 +2958,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..45, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..45, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..45 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2922,96 +2987,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 1, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: store i32 [[MUL3]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: store i32 [[MUL2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.46, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.46, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[MUL5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[MUL4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP19:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP24:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[_TMP6]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP6]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[MUL7]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP19]], i32 [[TMP22]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP24]] = extractvalue { i32, i1 } [[TMP23]], 0 -// CHECK1-NEXT: [[TMP25:%.*]] = extractvalue { i32, i1 } [[TMP23]], 1 -// CHECK1-NEXT: br i1 [[TMP25]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP22:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[_TMP5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[_TMP5]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[MUL6]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP22]], i32 [[TMP25]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP27]] = extractvalue { i32, i1 } [[TMP26]], 0 +// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { i32, i1 } [[TMP26]], 1 +// CHECK1-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3042,22 +3109,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..49, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..49, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..49 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AND_VAR1:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AND_VAR:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3067,121 +3138,123 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i8 1, ptr [[AND_VAR1]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i8 1, ptr [[AND_VAR]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[REM:%.*]] = srem i32 [[TMP12]], 2 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[REM]], 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[REM:%.*]] = srem i32 [[TMP15]], 2 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR1]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP16]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR]], align 1 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[AND_VAR1]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.50, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[AND_VAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.50, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL5]], label [[LAND_RHS6:%.*]], label [[LAND_END8:%.*]] -// CHECK1: land.rhs6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK1-NEXT: br label [[LAND_END8]] -// CHECK1: land.end8: -// CHECK1-NEXT: [[TMP19:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS6]] ] -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP19]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL4]], label [[LAND_RHS5:%.*]], label [[LAND_END7:%.*]] +// CHECK1: land.rhs5: +// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK1-NEXT: br label [[LAND_END7]] +// CHECK1: land.end7: +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LAND_RHS5]] ] +// CHECK1-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LAND_END17:%.*]] ] -// CHECK1-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END17]] -// CHECK1: land.rhs15: -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK1-NEXT: br label [[LAND_END17]] -// CHECK1: land.end17: -// CHECK1-NEXT: [[TMP24:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LAND_RHS15]] ] -// CHECK1-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP24]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 -// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 -// CHECK1-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LAND_END16:%.*]] ] +// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK1-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END16]] +// CHECK1: land.rhs14: +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK1-NEXT: br label [[LAND_END16]] +// CHECK1: land.end16: +// CHECK1-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LAND_RHS14]] ] +// CHECK1-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK1-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3219,22 +3292,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..53, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..53, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..53 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[OR_VAR1:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OR_VAR:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3244,121 +3321,123 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i8 0, ptr [[OR_VAR1]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i8 0, ptr [[OR_VAR]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]] // CHECK1: lor.rhs: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[REM:%.*]] = srem i32 [[TMP12]], 2 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[REM]], 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[REM:%.*]] = srem i32 [[TMP15]], 2 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 // CHECK1-NEXT: br label [[LOR_END]] // CHECK1: lor.end: -// CHECK1-NEXT: [[TMP13:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LOR_RHS]] ] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR1]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LOR_RHS]] ] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP16]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR]], align 1 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[OR_VAR1]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.54, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[OR_VAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.54, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL5]], label [[LOR_END8:%.*]], label [[LOR_RHS6:%.*]] -// CHECK1: lor.rhs6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK1-NEXT: br label [[LOR_END8]] -// CHECK1: lor.end8: -// CHECK1-NEXT: [[TMP19:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LOR_RHS6]] ] -// CHECK1-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP19]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL4]], label [[LOR_END7:%.*]], label [[LOR_RHS5:%.*]] +// CHECK1: lor.rhs5: +// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK1-NEXT: br label [[LOR_END7]] +// CHECK1: lor.end7: +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LOR_RHS5]] ] +// CHECK1-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LOR_END17:%.*]] ] -// CHECK1-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK1-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LOR_END17]], label [[LOR_RHS15:%.*]] -// CHECK1: lor.rhs15: -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK1-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK1-NEXT: br label [[LOR_END17]] -// CHECK1: lor.end17: -// CHECK1-NEXT: [[TMP24:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LOR_RHS15]] ] -// CHECK1-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP24]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 -// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 -// CHECK1-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LOR_END16:%.*]] ] +// CHECK1-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK1-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK1-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LOR_END16]], label [[LOR_RHS14:%.*]] +// CHECK1: lor.rhs14: +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK1-NEXT: br label [[LOR_END16]] +// CHECK1: lor.end16: +// CHECK1-NEXT: [[TMP27:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LOR_RHS14]] ] +// CHECK1-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK1-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3396,22 +3475,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..57, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..57, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..57 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3422,79 +3505,81 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 -1, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 -1, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[AND:%.*]] = and i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: store i32 [[AND]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[AND:%.*]] = and i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: store i32 [[AND]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.58, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.58, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[AND4:%.*]] = and i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[AND4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[AND3:%.*]] = and i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[AND3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3525,22 +3610,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..61, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..61, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..61 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3551,79 +3640,81 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: store i32 [[OR]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: store i32 [[OR]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.62, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.62, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[OR4:%.*]] = or i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[OR4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[OR3:%.*]] = or i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw or ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw or ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3654,22 +3745,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..65, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..65, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..65 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3680,79 +3775,81 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: store i32 [[XOR]], ptr [[BIT_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: store i32 [[XOR]], ptr [[BIT_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.66, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.66, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[XOR4:%.*]] = xor i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[XOR4]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[XOR3:%.*]] = xor i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[XOR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw xor ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3783,22 +3880,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..69, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..69, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..69 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3809,100 +3910,102 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 -2147483648, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 -2147483648, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sge i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK1: cond.true4: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END8:%.*]] -// CHECK1: cond.false5: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: br label [[COND_END8]] -// CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE4]] ], [ [[TMP15]], [[COND_FALSE5]] ] -// CHECK1-NEXT: store i32 [[COND9]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK1: cond.true3: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false4: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE3]] ], [ [[TMP18]], [[COND_FALSE4]] ] +// CHECK1-NEXT: store i32 [[COND8]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.70, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.70, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK1: cond.true12: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[COND_END14:%.*]] -// CHECK1: cond.false13: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END14]] -// CHECK1: cond.end14: -// CHECK1-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE12]] ], [ [[TMP22]], [[COND_FALSE13]] ] -// CHECK1-NEXT: store i32 [[COND15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE11]] ], [ [[TMP25]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw max ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw max ptr [[TMP2]], i32 [[TMP26]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -3942,22 +4045,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..73, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..73, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..73 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3968,100 +4075,102 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK1: cond.true4: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END8:%.*]] -// CHECK1: cond.false5: +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: br label [[COND_END8]] -// CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE4]] ], [ [[TMP15]], [[COND_FALSE5]] ] -// CHECK1-NEXT: store i32 [[COND9]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK1: cond.true3: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false4: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE3]] ], [ [[TMP18]], [[COND_FALSE4]] ] +// CHECK1-NEXT: store i32 [[COND8]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.74, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.74, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK1: cond.true12: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: br label [[COND_END14:%.*]] -// CHECK1: cond.false13: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: br label [[COND_END14]] -// CHECK1: cond.end14: -// CHECK1-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE12]] ], [ [[TMP22]], [[COND_FALSE13]] ] -// CHECK1-NEXT: store i32 [[COND15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE11]] ], [ [[TMP25]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw min ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw min ptr [[TMP2]], i32 [[TMP26]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -4585,19 +4694,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4608,74 +4720,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -4705,19 +4819,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4728,74 +4845,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[SUB]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[SUB]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -4825,19 +4944,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4847,91 +4969,93 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[MUL3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[MUL2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[MUL5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[MUL4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP17:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP22:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[_TMP6]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[_TMP6]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[MUL7]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP17]], i32 [[TMP20]] monotonic monotonic, align 4 -// CHECK3-NEXT: [[TMP22]] = extractvalue { i32, i1 } [[TMP21]], 0 -// CHECK3-NEXT: [[TMP23:%.*]] = extractvalue { i32, i1 } [[TMP21]], 1 -// CHECK3-NEXT: br i1 [[TMP23]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP19:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP24:%.*]], [[ATOMIC_CONT]] ] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[_TMP5]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP5]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[MUL6]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP19]], i32 [[TMP22]] monotonic monotonic, align 4 +// CHECK3-NEXT: [[TMP24]] = extractvalue { i32, i1 } [[TMP23]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = extractvalue { i32, i1 } [[TMP23]], 1 +// CHECK3-NEXT: br i1 [[TMP25]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -4961,19 +5085,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AND_VAR1:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AND_VAR:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4983,115 +5110,117 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i8 1, ptr [[AND_VAR1]], align 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i8 1, ptr [[AND_VAR]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK3: land.rhs: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP12]], 0 // CHECK3-NEXT: br label [[LAND_END]] // CHECK3: land.end: -// CHECK3-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LAND_RHS]] ] -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR1]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LAND_RHS]] ] +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR]], align 1 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[AND_VAR1]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP15]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[LAND_RHS6:%.*]], label [[LAND_END8:%.*]] -// CHECK3: land.rhs6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK3-NEXT: br label [[LAND_END8]] -// CHECK3: land.end8: -// CHECK3-NEXT: [[TMP17:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS6]] ] -// CHECK3-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP17]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[LAND_RHS5:%.*]], label [[LAND_END7:%.*]] +// CHECK3: land.rhs5: +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: br label [[LAND_END7]] +// CHECK3: land.end7: +// CHECK3-NEXT: [[TMP19:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LAND_RHS5]] ] +// CHECK3-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP19]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP19:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[LAND_END17:%.*]] ] -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END17]] -// CHECK3: land.rhs15: -// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK3-NEXT: br label [[LAND_END17]] -// CHECK3: land.end17: -// CHECK3-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LAND_RHS15]] ] -// CHECK3-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP22]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP24:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP19]], i8 [[TMP23]] monotonic monotonic, align 1 -// CHECK3-NEXT: [[TMP25]] = extractvalue { i8, i1 } [[TMP24]], 0 -// CHECK3-NEXT: [[TMP26:%.*]] = extractvalue { i8, i1 } [[TMP24]], 1 -// CHECK3-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LAND_END16:%.*]] ] +// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END16]] +// CHECK3: land.rhs14: +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK3-NEXT: br label [[LAND_END16]] +// CHECK3: land.end16: +// CHECK3-NEXT: [[TMP24:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LAND_RHS14]] ] +// CHECK3-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP24]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 +// CHECK3-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 +// CHECK3-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 +// CHECK3-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5128,19 +5257,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[OR_VAR1:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OR_VAR:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5150,115 +5282,117 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i8 0, ptr [[OR_VAR1]], align 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i8 0, ptr [[OR_VAR]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]] // CHECK3: lor.rhs: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP12]], 0 // CHECK3-NEXT: br label [[LOR_END]] // CHECK3: lor.end: -// CHECK3-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LOR_RHS]] ] -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR1]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LOR_RHS]] ] +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR]], align 1 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[OR_VAR1]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP15]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[LOR_END8:%.*]], label [[LOR_RHS6:%.*]] -// CHECK3: lor.rhs6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK3-NEXT: br label [[LOR_END8]] -// CHECK3: lor.end8: -// CHECK3-NEXT: [[TMP17:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LOR_RHS6]] ] -// CHECK3-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP17]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP17]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[LOR_END7:%.*]], label [[LOR_RHS5:%.*]] +// CHECK3: lor.rhs5: +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: br label [[LOR_END7]] +// CHECK3: lor.end7: +// CHECK3-NEXT: [[TMP19:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LOR_RHS5]] ] +// CHECK3-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP19]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP19:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[LOR_END17:%.*]] ] -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[LOR_END17]], label [[LOR_RHS15:%.*]] -// CHECK3: lor.rhs15: -// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK3-NEXT: br label [[LOR_END17]] -// CHECK3: lor.end17: -// CHECK3-NEXT: [[TMP22:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LOR_RHS15]] ] -// CHECK3-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP22]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP24:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP19]], i8 [[TMP23]] monotonic monotonic, align 1 -// CHECK3-NEXT: [[TMP25]] = extractvalue { i8, i1 } [[TMP24]], 0 -// CHECK3-NEXT: [[TMP26:%.*]] = extractvalue { i8, i1 } [[TMP24]], 1 -// CHECK3-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LOR_END16:%.*]] ] +// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP22]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL13]], label [[LOR_END16]], label [[LOR_RHS14:%.*]] +// CHECK3: lor.rhs14: +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK3-NEXT: br label [[LOR_END16]] +// CHECK3: lor.end16: +// CHECK3-NEXT: [[TMP24:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LOR_RHS14]] ] +// CHECK3-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP24]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 +// CHECK3-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 +// CHECK3-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 +// CHECK3-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5295,19 +5429,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5318,74 +5455,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 -1, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 -1, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[AND]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[AND]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[AND4:%.*]] = and i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[AND4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[AND3:%.*]] = and i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[AND3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5415,19 +5554,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5438,74 +5580,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[OR]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[OR]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[OR4:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[OR4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[OR3:%.*]] = or i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw or ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5535,19 +5679,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5558,74 +5705,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[XOR]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[XOR]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[XOR4:%.*]] = xor i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[XOR4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[XOR3:%.*]] = xor i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[XOR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw xor ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw xor ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5655,19 +5804,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5678,92 +5830,94 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 -2147483648, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 -2147483648, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sge i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK3: cond.true4: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END6:%.*]] -// CHECK3: cond.false5: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: br label [[COND_END6]] -// CHECK3: cond.end6: -// CHECK3-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK3-NEXT: store i32 [[COND7]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK3: cond.true3: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[COND_END5:%.*]] +// CHECK3: cond.false4: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: br label [[COND_END5]] +// CHECK3: cond.end5: +// CHECK3-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK3-NEXT: store i32 [[COND6]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: br label [[COND_END12:%.*]] -// CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END12]] -// CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE10]] ], [ [[TMP19]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK3: cond.true9: +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: br label [[COND_END11:%.*]] +// CHECK3: cond.false10: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[COND_END11]] +// CHECK3: cond.end11: +// CHECK3-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE9]] ], [ [[TMP21]], [[COND_FALSE10]] ] +// CHECK3-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw max ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw max ptr [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -5802,19 +5956,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..33 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5825,92 +5982,94 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 2147483647, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 2147483647, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK3: cond.true4: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END6:%.*]] -// CHECK3: cond.false5: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: br label [[COND_END6]] -// CHECK3: cond.end6: -// CHECK3-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK3-NEXT: store i32 [[COND7]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK3: cond.true3: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[COND_END5:%.*]] +// CHECK3: cond.false4: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: br label [[COND_END5]] +// CHECK3: cond.end5: +// CHECK3-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK3-NEXT: store i32 [[COND6]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: br label [[COND_END12:%.*]] -// CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END12]] -// CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE10]] ], [ [[TMP19]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK3: cond.true9: +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: br label [[COND_END11:%.*]] +// CHECK3: cond.false10: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: br label [[COND_END11]] +// CHECK3: cond.end11: +// CHECK3-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE9]] ], [ [[TMP21]], [[COND_FALSE10]] ] +// CHECK3-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw min ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw min ptr [[TMP2]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -6482,19 +6641,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..37, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..37, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..37 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6505,74 +6667,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -6603,22 +6767,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..41, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..41, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..41 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6629,78 +6797,80 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[SUB]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], [[TMP14]] +// CHECK3-NEXT: store i32 [[SUB]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.42, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.42, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -6731,22 +6901,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..45, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..45, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..45 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6756,95 +6930,97 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: store i32 [[MUL3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: store i32 [[MUL2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.46, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.46, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[MUL5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[MUL4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP19:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP24:%.*]], [[ATOMIC_CONT]] ] -// CHECK3-NEXT: store i32 [[TMP19]], ptr [[_TMP6]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP6]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[MUL7]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP19]], i32 [[TMP22]] monotonic monotonic, align 4 -// CHECK3-NEXT: [[TMP24]] = extractvalue { i32, i1 } [[TMP23]], 0 -// CHECK3-NEXT: [[TMP25:%.*]] = extractvalue { i32, i1 } [[TMP23]], 1 -// CHECK3-NEXT: br i1 [[TMP25]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP22:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[ATOMIC_CONT]] ] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[_TMP5]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[_TMP5]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[MUL6]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP22]], i32 [[TMP25]] monotonic monotonic, align 4 +// CHECK3-NEXT: [[TMP27]] = extractvalue { i32, i1 } [[TMP26]], 0 +// CHECK3-NEXT: [[TMP28:%.*]] = extractvalue { i32, i1 } [[TMP26]], 1 +// CHECK3-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -6875,22 +7051,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..49, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..49, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..49 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AND_VAR1:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AND_VAR:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6900,120 +7080,122 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i8 1, ptr [[AND_VAR1]], align 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i8 1, ptr [[AND_VAR]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK3: land.rhs: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[REM:%.*]] = srem i32 [[TMP12]], 2 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp eq i32 [[REM]], 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[REM:%.*]] = srem i32 [[TMP15]], 2 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 // CHECK3-NEXT: br label [[LAND_END]] // CHECK3: land.end: -// CHECK3-NEXT: [[TMP13:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LAND_RHS]] ] -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR1]], align 1 +// CHECK3-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LAND_RHS]] ] +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP16]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR]], align 1 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[AND_VAR1]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.50, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AND_VAR]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.50, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[LAND_RHS6:%.*]], label [[LAND_END8:%.*]] -// CHECK3: land.rhs6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK3-NEXT: br label [[LAND_END8]] -// CHECK3: land.end8: -// CHECK3-NEXT: [[TMP19:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS6]] ] -// CHECK3-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP19]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[LAND_RHS5:%.*]], label [[LAND_END7:%.*]] +// CHECK3: land.rhs5: +// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: br label [[LAND_END7]] +// CHECK3: land.end7: +// CHECK3-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LAND_RHS5]] ] +// CHECK3-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LAND_END17:%.*]] ] -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END17]] -// CHECK3: land.rhs15: -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK3-NEXT: br label [[LAND_END17]] -// CHECK3: land.end17: -// CHECK3-NEXT: [[TMP24:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LAND_RHS15]] ] -// CHECK3-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP24]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 -// CHECK3-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 -// CHECK3-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 -// CHECK3-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LAND_END16:%.*]] ] +// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK3-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END16]] +// CHECK3: land.rhs14: +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK3-NEXT: br label [[LAND_END16]] +// CHECK3: land.end16: +// CHECK3-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LAND_RHS14]] ] +// CHECK3-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK3-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK3-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7051,22 +7233,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..53, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..53, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..53 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[OR_VAR1:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OR_VAR:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7076,120 +7262,122 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i8 0, ptr [[OR_VAR1]], align 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i8 0, ptr [[OR_VAR]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]] // CHECK3: lor.rhs: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[REM:%.*]] = srem i32 [[TMP12]], 2 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp eq i32 [[REM]], 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[REM:%.*]] = srem i32 [[TMP15]], 2 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 // CHECK3-NEXT: br label [[LOR_END]] // CHECK3: lor.end: -// CHECK3-NEXT: [[TMP13:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LOR_RHS]] ] -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP13]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR1]], align 1 +// CHECK3-NEXT: [[TMP16:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LOR_RHS]] ] +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP16]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR]], align 1 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[OR_VAR1]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.54, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[OR_VAR]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.54, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[LOR_END8:%.*]], label [[LOR_RHS6:%.*]] -// CHECK3: lor.rhs6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP18]] to i1 -// CHECK3-NEXT: br label [[LOR_END8]] -// CHECK3: lor.end8: -// CHECK3-NEXT: [[TMP19:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LOR_RHS6]] ] -// CHECK3-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP19]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL4]], label [[LOR_END7:%.*]], label [[LOR_RHS5:%.*]] +// CHECK3: lor.rhs5: +// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: br label [[LOR_END7]] +// CHECK3: lor.end7: +// CHECK3-NEXT: [[TMP22:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LOR_RHS5]] ] +// CHECK3-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK3-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: -// CHECK3-NEXT: [[TMP21:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP27:%.*]], [[LOR_END17:%.*]] ] -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[LOR_END17]], label [[LOR_RHS15:%.*]] -// CHECK3: lor.rhs15: -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK3-NEXT: br label [[LOR_END17]] -// CHECK3: lor.end17: -// CHECK3-NEXT: [[TMP24:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LOR_RHS15]] ] -// CHECK3-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP24]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK3-NEXT: [[TMP26:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP21]], i8 [[TMP25]] monotonic monotonic, align 1 -// CHECK3-NEXT: [[TMP27]] = extractvalue { i8, i1 } [[TMP26]], 0 -// CHECK3-NEXT: [[TMP28:%.*]] = extractvalue { i8, i1 } [[TMP26]], 1 -// CHECK3-NEXT: br i1 [[TMP28]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK3-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LOR_END16:%.*]] ] +// CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK3-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK3-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL13]], label [[LOR_END16]], label [[LOR_RHS14:%.*]] +// CHECK3: lor.rhs14: +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK3-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK3-NEXT: br label [[LOR_END16]] +// CHECK3: lor.end16: +// CHECK3-NEXT: [[TMP27:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LOR_RHS14]] ] +// CHECK3-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK3-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK3-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK3-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7227,22 +7415,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..57, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..57, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..57 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7253,78 +7445,80 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 -1, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 -1, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: store i32 [[AND]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: store i32 [[AND]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.58, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.58, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[AND4:%.*]] = and i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[AND4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[AND3:%.*]] = and i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[AND3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7355,22 +7549,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..61, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..61, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..61 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7381,78 +7579,80 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: store i32 [[OR]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: store i32 [[OR]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.62, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.62, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[OR4:%.*]] = or i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[OR4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[OR3:%.*]] = or i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw or ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw or ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7483,22 +7683,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..65, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..65, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..65 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7509,78 +7713,80 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: store i32 [[XOR]], ptr [[BIT_VAR1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: store i32 [[XOR]], ptr [[BIT_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.66, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.66, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[XOR4:%.*]] = xor i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[XOR4]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[XOR3:%.*]] = xor i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[XOR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw xor ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7611,22 +7817,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..69, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..69, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..69 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7637,98 +7847,100 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 -2147483648, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 -2147483648, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sge i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK3: cond.true4: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false5: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE4]] ], [ [[TMP15]], [[COND_FALSE5]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK3: cond.true3: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: br label [[COND_END6:%.*]] +// CHECK3: cond.false4: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: br label [[COND_END6]] +// CHECK3: cond.end6: +// CHECK3-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE3]] ], [ [[TMP18]], [[COND_FALSE4]] ] +// CHECK3-NEXT: store i32 [[COND7]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.70, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.70, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: br label [[COND_END13:%.*]] -// CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END13]] -// CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE11]] ], [ [[TMP22]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE10]] ], [ [[TMP25]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw max ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw max ptr [[TMP2]], i32 [[TMP26]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7768,22 +7980,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..73, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..73, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..73 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7794,98 +8010,100 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 2147483647, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK3: cond.true4: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false5: +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE4]] ], [ [[TMP15]], [[COND_FALSE5]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK3: cond.true3: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: br label [[COND_END6:%.*]] +// CHECK3: cond.false4: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: br label [[COND_END6]] +// CHECK3: cond.end6: +// CHECK3-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE3]] ], [ [[TMP18]], [[COND_FALSE4]] ] +// CHECK3-NEXT: store i32 [[COND7]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.74, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.74, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: br label [[COND_END13:%.*]] -// CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: br label [[COND_END13]] -// CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE11]] ], [ [[TMP22]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE10]] ], [ [[TMP25]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw min ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw min ptr [[TMP2]], i32 [[TMP26]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -7959,19 +8177,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7983,77 +8204,79 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_18]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_18]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8083,19 +8306,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8103,81 +8329,83 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[SUB]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[SUB]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_1clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8207,19 +8435,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8227,97 +8458,99 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_22:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 1, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[MUL3]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[MUL2]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_22]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_2clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[MUL5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[MUL4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK9-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK9: atomic_cont: -// CHECK9-NEXT: [[TMP18:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP23:%.*]], [[ATOMIC_CONT]] ] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[_TMP6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[_TMP6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[MUL7]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP18]], i32 [[TMP21]] monotonic monotonic, align 4 -// CHECK9-NEXT: [[TMP23]] = extractvalue { i32, i1 } [[TMP22]], 0 -// CHECK9-NEXT: [[TMP24:%.*]] = extractvalue { i32, i1 } [[TMP22]], 1 -// CHECK9-NEXT: br i1 [[TMP24]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK9-NEXT: [[TMP20:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP25:%.*]], [[ATOMIC_CONT]] ] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[_TMP5]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[_TMP5]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: store i32 [[MUL6]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP20]], i32 [[TMP23]] monotonic monotonic, align 4 +// CHECK9-NEXT: [[TMP25]] = extractvalue { i32, i1 } [[TMP24]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = extractvalue { i32, i1 } [[TMP24]], 1 +// CHECK9-NEXT: br i1 [[TMP26]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK9: atomic_exit: -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8348,25 +8581,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[AND_VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AND_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AND_VAR1:%.*]] = alloca i8, align 1 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AND_VAR:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8374,122 +8609,126 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_21:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_24:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK9-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[AND_VAR]], ptr [[AND_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AND_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i8 1, ptr [[AND_VAR1]], align 1 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i8 1, ptr [[AND_VAR]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK9: land.rhs: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP14]], 0 // CHECK9-NEXT: br label [[LAND_END]] // CHECK9: land.end: -// CHECK9-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LAND_RHS]] ] -// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR1]], align 1 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_21]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LAND_RHS]] ] +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP15]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[AND_VAR]], align 1 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_24]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_3clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[AND_VAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[AND_VAR]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK9-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL5]], label [[LAND_RHS6:%.*]], label [[LAND_END8:%.*]] -// CHECK9: land.rhs6: -// CHECK9-NEXT: [[TMP17:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK9-NEXT: br label [[LAND_END8]] -// CHECK9: land.end8: -// CHECK9-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS6]] ] -// CHECK9-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP18]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK9-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL4]], label [[LAND_RHS5:%.*]], label [[LAND_END7:%.*]] +// CHECK9: land.rhs5: +// CHECK9-NEXT: [[TMP21:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK9-NEXT: br label [[LAND_END7]] +// CHECK9: land.end7: +// CHECK9-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LAND_RHS5]] ] +// CHECK9-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK9-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK9: atomic_cont: -// CHECK9-NEXT: [[TMP20:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP26:%.*]], [[LAND_END17:%.*]] ] -// CHECK9-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK9-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK9-NEXT: [[TMP21:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK9-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END17]] -// CHECK9: land.rhs15: -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, ptr [[AND_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK9-NEXT: br label [[LAND_END17]] -// CHECK9: land.end17: -// CHECK9-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LAND_RHS15]] ] -// CHECK9-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP23]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK9-NEXT: [[TMP24:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK9-NEXT: [[TMP25:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP20]], i8 [[TMP24]] monotonic monotonic, align 1 -// CHECK9-NEXT: [[TMP26]] = extractvalue { i8, i1 } [[TMP25]], 0 -// CHECK9-NEXT: [[TMP27:%.*]] = extractvalue { i8, i1 } [[TMP25]], 1 -// CHECK9-NEXT: br i1 [[TMP27]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK9-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LAND_END16:%.*]] ] +// CHECK9-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK9-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK9-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END16]] +// CHECK9: land.rhs14: +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[AND_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK9-NEXT: br label [[LAND_END16]] +// CHECK9: land.end16: +// CHECK9-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LAND_RHS14]] ] +// CHECK9-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK9-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK9-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK9-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK9-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK9: atomic_exit: -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8527,25 +8766,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..13, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[OR_VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[OR_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[OR_VAR1:%.*]] = alloca i8, align 1 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OR_VAR:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8553,122 +8794,126 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_22:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_27:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[_TMP12:%.*]] = alloca i8, align 1 +// CHECK9-NEXT: [[_TMP11:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[OR_VAR]], ptr [[OR_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OR_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i8 0, ptr [[OR_VAR1]], align 1 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i8 0, ptr [[OR_VAR]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[LOR_END:%.*]], label [[LOR_RHS:%.*]] // CHECK9: lor.rhs: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TMP14]], 0 // CHECK9-NEXT: br label [[LOR_END]] // CHECK9: lor.end: -// CHECK9-NEXT: [[TMP11:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP3]], [[LOR_RHS]] ] -// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP11]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR1]], align 1 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_22]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = phi i1 [ true, [[OMP_INNER_FOR_BODY]] ], [ [[CMP2]], [[LOR_RHS]] ] +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP15]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[OR_VAR]], align 1 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_27]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_4clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[OR_VAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[OR_VAR]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK9-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL5]], label [[LOR_END8:%.*]], label [[LOR_RHS6:%.*]] -// CHECK9: lor.rhs6: -// CHECK9-NEXT: [[TMP17:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL7:%.*]] = trunc i8 [[TMP17]] to i1 -// CHECK9-NEXT: br label [[LOR_END8]] -// CHECK9: lor.end8: -// CHECK9-NEXT: [[TMP18:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LOR_RHS6]] ] -// CHECK9-NEXT: [[FROMBOOL9:%.*]] = zext i1 [[TMP18]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL9]], ptr [[TMP0]], align 1 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK9-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL4]], label [[LOR_END7:%.*]], label [[LOR_RHS5:%.*]] +// CHECK9: lor.rhs5: +// CHECK9-NEXT: [[TMP21:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK9-NEXT: br label [[LOR_END7]] +// CHECK9: lor.end7: +// CHECK9-NEXT: [[TMP22:%.*]] = phi i1 [ true, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL6]], [[LOR_RHS5]] ] +// CHECK9-NEXT: [[FROMBOOL8:%.*]] = zext i1 [[TMP22]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL8]], ptr [[TMP2]], align 1 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP19]] to i1 -// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP0]] monotonic, align 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP23]] to i1 +// CHECK9-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[TMP2]] monotonic, align 1 // CHECK9-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK9: atomic_cont: -// CHECK9-NEXT: [[TMP20:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP26:%.*]], [[LOR_END17:%.*]] ] -// CHECK9-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP20]] to i1 -// CHECK9-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL13]], ptr [[_TMP12]], align 1 -// CHECK9-NEXT: [[TMP21:%.*]] = load i8, ptr [[_TMP12]], align 1 -// CHECK9-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL14]], label [[LOR_END17]], label [[LOR_RHS15:%.*]] -// CHECK9: lor.rhs15: -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, ptr [[OR_VAR1]], align 1 -// CHECK9-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK9-NEXT: br label [[LOR_END17]] -// CHECK9: lor.end17: -// CHECK9-NEXT: [[TMP23:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL16]], [[LOR_RHS15]] ] -// CHECK9-NEXT: [[FROMBOOL18:%.*]] = zext i1 [[TMP23]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL18]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK9-NEXT: [[TMP24:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK9-NEXT: [[TMP25:%.*]] = cmpxchg ptr [[TMP0]], i8 [[TMP20]], i8 [[TMP24]] monotonic monotonic, align 1 -// CHECK9-NEXT: [[TMP26]] = extractvalue { i8, i1 } [[TMP25]], 0 -// CHECK9-NEXT: [[TMP27:%.*]] = extractvalue { i8, i1 } [[TMP25]], 1 -// CHECK9-NEXT: br i1 [[TMP27]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK9-NEXT: [[TMP24:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP30:%.*]], [[LOR_END16:%.*]] ] +// CHECK9-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 +// CHECK9-NEXT: [[FROMBOOL12:%.*]] = zext i1 [[TOBOOL10]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL12]], ptr [[_TMP11]], align 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i8, ptr [[_TMP11]], align 1 +// CHECK9-NEXT: [[TOBOOL13:%.*]] = trunc i8 [[TMP25]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL13]], label [[LOR_END16]], label [[LOR_RHS14:%.*]] +// CHECK9: lor.rhs14: +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[OR_VAR]], align 1 +// CHECK9-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP26]] to i1 +// CHECK9-NEXT: br label [[LOR_END16]] +// CHECK9: lor.end16: +// CHECK9-NEXT: [[TMP27:%.*]] = phi i1 [ true, [[ATOMIC_CONT]] ], [ [[TOBOOL15]], [[LOR_RHS14]] ] +// CHECK9-NEXT: [[FROMBOOL17:%.*]] = zext i1 [[TMP27]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL17]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK9-NEXT: [[TMP29:%.*]] = cmpxchg ptr [[TMP2]], i8 [[TMP24]], i8 [[TMP28]] monotonic monotonic, align 1 +// CHECK9-NEXT: [[TMP30]] = extractvalue { i8, i1 } [[TMP29]], 0 +// CHECK9-NEXT: [[TMP31:%.*]] = extractvalue { i8, i1 } [[TMP29]], 1 +// CHECK9-NEXT: br i1 [[TMP31]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK9: atomic_exit: -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8706,25 +8951,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..17, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8732,82 +8979,86 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_23:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_30:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 -1, ptr [[BIT_VAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 -1, ptr [[BIT_VAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[AND:%.*]] = and i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: store i32 [[AND]], ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_23]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[AND:%.*]] = and i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: store i32 [[AND]], ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_30]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_5clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[AND4:%.*]] = and i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[AND4]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[AND3:%.*]] = and i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[AND3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw and ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8838,25 +9089,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 8 // CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..21, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8864,82 +9117,86 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_24:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_33:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_31:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_31]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[OR:%.*]] = or i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: store i32 [[OR]], ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_24]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[OR:%.*]] = or i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: store i32 [[OR]], ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_33]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_6clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[OR4:%.*]] = or i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[OR4]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[OR3:%.*]] = or i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw or ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw or ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -8970,25 +9227,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 8 // CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..25, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BIT_VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[BIT_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BIT_VAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[BIT_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8996,82 +9255,86 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_25:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_36:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BIT_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[BIT_VAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_34:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_34]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 0, ptr [[BIT_VAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[XOR:%.*]] = xor i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: store i32 [[XOR]], ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_25]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[XOR:%.*]] = xor i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: store i32 [[XOR]], ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_36]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_7clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[BIT_VAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[BIT_VAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[XOR4:%.*]] = xor i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[XOR4]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[XOR3:%.*]] = xor i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[XOR3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[BIT_VAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw xor ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[BIT_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -9101,19 +9364,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[MAX_VAR:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[MAX_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 8 // CHECK9-NEXT: store ptr [[MAX_VAR]], ptr [[MAX_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[MAX_VAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_37]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[MAX_VAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[MAX_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[MAX_VAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[MAX_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9121,99 +9387,101 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_26:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_38:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[MAX_VAR]], ptr [[MAX_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[MAX_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 -2147483648, ptr [[MAX_VAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_37:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 -2147483648, ptr [[MAX_VAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sge i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK9: cond.true4: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: br label [[COND_END6:%.*]] -// CHECK9: cond.false5: +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[MAX_VAR]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: br label [[COND_END6]] -// CHECK9: cond.end6: -// CHECK9-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK9-NEXT: store i32 [[COND7]], ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_26]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[MAX_VAR1]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK9: cond.true3: +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[MAX_VAR]], align 4 +// CHECK9-NEXT: br label [[COND_END5:%.*]] +// CHECK9: cond.false4: +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: br label [[COND_END5]] +// CHECK9: cond.end5: +// CHECK9-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK9-NEXT: store i32 [[COND6]], ptr [[MAX_VAR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_38]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[MAX_VAR]], ptr [[TMP15]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_8clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[MAX_VAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[MAX_VAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: br label [[COND_END12:%.*]] -// CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: br label [[COND_END12]] -// CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE10]] ], [ [[TMP20]], [[COND_FALSE11]] ] -// CHECK9-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[MAX_VAR]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK9: cond.true9: +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: br label [[COND_END11:%.*]] +// CHECK9: cond.false10: +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[MAX_VAR]], align 4 +// CHECK9-NEXT: br label [[COND_END11]] +// CHECK9: cond.end11: +// CHECK9-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE9]] ], [ [[TMP22]], [[COND_FALSE10]] ] +// CHECK9-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[MAX_VAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw max ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[MAX_VAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw max ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -9252,19 +9520,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[MIN_VAR:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[MIN_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_39:%.*]], align 8 // CHECK9-NEXT: store ptr [[MIN_VAR]], ptr [[MIN_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[MIN_VAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_39]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..33, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..33 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[MIN_VAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[MIN_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[MIN_VAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[MIN_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9272,99 +9543,101 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_27:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_40:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[MIN_VAR]], ptr [[MIN_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[MIN_VAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 2147483647, ptr [[MIN_VAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_39:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 2147483647, ptr [[MIN_VAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] -// CHECK9: cond.true4: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: br label [[COND_END6:%.*]] -// CHECK9: cond.false5: +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[MIN_VAR]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: br label [[COND_END6]] -// CHECK9: cond.end6: -// CHECK9-NEXT: [[COND7:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE4]] ], [ [[TMP12]], [[COND_FALSE5]] ] -// CHECK9-NEXT: store i32 [[COND7]], ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_27]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[MIN_VAR1]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[COND_TRUE3:%.*]], label [[COND_FALSE4:%.*]] +// CHECK9: cond.true3: +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[MIN_VAR]], align 4 +// CHECK9-NEXT: br label [[COND_END5:%.*]] +// CHECK9: cond.false4: +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: br label [[COND_END5]] +// CHECK9: cond.end5: +// CHECK9-NEXT: [[COND6:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE3]] ], [ [[TMP14]], [[COND_FALSE4]] ] +// CHECK9-NEXT: store i32 [[COND6]], ptr [[MIN_VAR]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_40]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[MIN_VAR]], ptr [[TMP15]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_9clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[MIN_VAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[MIN_VAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.34, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: br label [[COND_END12:%.*]] -// CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: br label [[COND_END12]] -// CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE10]] ], [ [[TMP20]], [[COND_FALSE11]] ] -// CHECK9-NEXT: store i32 [[COND13]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[MIN_VAR]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK9: cond.true9: +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: br label [[COND_END11:%.*]] +// CHECK9: cond.false10: +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[MIN_VAR]], align 4 +// CHECK9-NEXT: br label [[COND_END11]] +// CHECK9: cond.end11: +// CHECK9-NEXT: [[COND12:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE9]] ], [ [[TMP22]], [[COND_FALSE10]] ] +// CHECK9-NEXT: store i32 [[COND12]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[MIN_VAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw min ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[MIN_VAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw min ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -313,7 +313,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -383,13 +383,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -640,7 +640,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -648,19 +648,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -670,53 +671,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -799,8 +804,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !26 @@ -851,21 +856,22 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -875,58 +881,62 @@ // CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 10, ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void @@ -936,21 +946,22 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -960,58 +971,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1024,27 +1039,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1054,62 +1069,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1129,7 +1150,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1147,27 +1168,36 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1177,107 +1207,111 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK1-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK1-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK1-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK1-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK1-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK1-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK1-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1667,7 +1701,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1677,23 +1711,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1703,75 +1742,79 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1787,42 +1830,41 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1836,111 +1878,121 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK1-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK1-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK1-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK1-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK1-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: @@ -1955,30 +2007,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1988,68 +2041,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2084,7 +2143,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -2152,13 +2211,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2411,7 +2470,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2419,19 +2478,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2441,53 +2501,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2570,8 +2634,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !27 @@ -2622,21 +2686,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2646,58 +2711,62 @@ // CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 10, ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: ret void @@ -2707,21 +2776,22 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2731,58 +2801,62 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2795,27 +2869,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2825,62 +2899,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2900,7 +2980,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2918,27 +2998,36 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2948,107 +3037,111 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK3-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK3-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK3-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK3-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK3-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK3-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK3-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK3-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3438,7 +3531,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3448,23 +3541,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3474,75 +3572,79 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3558,42 +3660,41 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3607,111 +3708,121 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK3-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK3-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK3-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK3-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK3-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: @@ -3726,30 +3837,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3759,68 +3871,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3855,7 +3973,7 @@ // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -3925,13 +4043,13 @@ // CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK5-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK5-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -4182,7 +4300,7 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4190,19 +4308,20 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK5-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4212,53 +4331,57 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4341,8 +4464,8 @@ // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK5-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !26 @@ -4393,21 +4516,22 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4417,58 +4541,62 @@ // CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 10, ptr [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void @@ -4478,21 +4606,22 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4502,58 +4631,62 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4566,27 +4699,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4596,62 +4729,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4671,7 +4810,7 @@ // CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4689,27 +4828,36 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4719,107 +4867,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK5-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK5-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK5-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK5-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK5-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK5-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK5-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK5-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK5-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK5-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK5-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5227,8 +5379,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5239,29 +5390,34 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], i64 [[TMP7]]) +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5271,118 +5427,126 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD3]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK5: omp.inner.for.cond9: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK5: omp.inner.for.body11: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK5-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK5-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD15]], ptr [[A16]], align 8 -// CHECK5-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 8 -// CHECK5-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC18]], ptr [[A17]], align 8 -// CHECK5-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK5-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP22]] -// CHECK5-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i64 1 -// CHECK5-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK5: omp.body.continue22: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK5: omp.inner.for.inc23: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end25: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK5: omp.inner.for.cond8: +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK5: omp.inner.for.body10: +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK5-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK5-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK5-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD14]], ptr [[A15]], align 8 +// CHECK5-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 8 +// CHECK5-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC17]], ptr [[A16]], align 8 +// CHECK5-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i64 1 +// CHECK5-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK5: omp.body.continue21: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK5: omp.inner.for.inc22: +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end24: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5398,42 +5562,41 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5447,111 +5610,121 @@ // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK5-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK5-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK5-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK5-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK5-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK5-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK5-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK5-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK5-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK5-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK5-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK5-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK5-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: @@ -5566,30 +5739,31 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5599,68 +5773,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5695,7 +5875,7 @@ // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -5763,13 +5943,13 @@ // CHECK7-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK7-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK7-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK7-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -6022,7 +6202,7 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -6030,19 +6210,20 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK7-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6052,53 +6233,57 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6181,8 +6366,8 @@ // CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK7-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK7-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !27 @@ -6233,21 +6418,22 @@ // CHECK7-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6257,58 +6443,62 @@ // CHECK7-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !28 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal !28 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 10, ptr [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void @@ -6318,21 +6508,22 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6342,58 +6533,62 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK7-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6406,27 +6601,27 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6436,62 +6631,68 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6511,7 +6712,7 @@ // CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -6529,27 +6730,36 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6559,107 +6769,111 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK7-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK7-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK7-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK7-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK7-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK7-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK7-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK7-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK7-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK7-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK7-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7067,8 +7281,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7079,29 +7292,34 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], i32 [[TMP7]]) +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7111,118 +7329,126 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK7: omp.inner.for.cond9: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK7-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK7: omp.inner.for.body11: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK7-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK7-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD15]], ptr [[A16]], align 4 -// CHECK7-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 4 -// CHECK7-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC18]], ptr [[A17]], align 4 -// CHECK7-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK7-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP22]] -// CHECK7-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i32 1 -// CHECK7-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK7: omp.body.continue22: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK7: omp.inner.for.inc23: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end25: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK7: omp.inner.for.cond8: +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK7: omp.inner.for.body10: +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK7-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK7-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK7-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD14]], ptr [[A15]], align 4 +// CHECK7-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 4 +// CHECK7-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC17]], ptr [[A16]], align 4 +// CHECK7-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i32 1 +// CHECK7-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK7: omp.body.continue21: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK7: omp.inner.for.inc22: +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end24: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7238,42 +7464,41 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK7-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK7-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7287,111 +7512,121 @@ // CHECK7-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK7-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK7-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK7-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK7-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK7-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK7-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK7-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK7-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK7-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK7-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK7-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK7-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK7-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK7-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK7-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: @@ -7406,30 +7641,31 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7439,68 +7675,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9653,7 +9895,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9661,19 +9903,20 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9683,53 +9926,57 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9741,21 +9988,22 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9765,58 +10013,62 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK17-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9829,27 +10081,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9859,62 +10111,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9934,7 +10192,7 @@ // CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9952,27 +10210,36 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9982,107 +10249,111 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK17-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK17-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK17-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK17-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK17-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK17-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK17-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK17-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK17-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK17-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK17-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK17-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK17-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK17-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK17-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK17-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK17-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK17-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10098,42 +10369,41 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10147,111 +10417,121 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK17-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK17-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK17-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK17-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK17-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK17-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK17-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK17-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK17-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK17-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK17-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK17-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK17-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK17-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK17-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK17-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: @@ -10268,7 +10548,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10278,23 +10558,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10304,75 +10589,79 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: store double [[ADD2]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10386,30 +10675,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10419,68 +10709,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10494,7 +10790,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -10502,19 +10798,20 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10524,53 +10821,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10582,21 +10883,22 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10606,58 +10908,62 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK19-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10670,27 +10976,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10700,62 +11006,68 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10775,7 +11087,7 @@ // CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10793,27 +11105,36 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10823,107 +11144,111 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK19-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK19-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK19-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK19-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK19-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK19-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK19-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK19-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK19-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK19-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK19-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK19-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK19-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK19-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK19-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10939,42 +11264,41 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10988,111 +11312,121 @@ // CHECK19-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK19-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK19-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK19-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK19-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK19-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK19-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK19-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK19-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK19-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK19-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK19-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK19-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK19-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK19-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK19-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK19-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK19-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK19-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: @@ -11109,7 +11443,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11119,23 +11453,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11145,75 +11484,79 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK19-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: store double [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11227,30 +11570,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11260,68 +11604,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11335,7 +11685,7 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11343,19 +11693,20 @@ // CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK21-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11365,53 +11716,57 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK21-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11423,21 +11778,22 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11447,58 +11803,62 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK21-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK21-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11511,27 +11871,27 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11541,62 +11901,68 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11616,7 +11982,7 @@ // CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -11634,27 +12000,36 @@ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11664,107 +12039,111 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK21-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK21-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK21-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK21-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK21-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK21-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK21-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK21-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK21-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK21-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK21-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK21-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK21-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK21-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK21-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK21-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11780,42 +12159,41 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK21-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK21-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11829,111 +12207,121 @@ // CHECK21-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK21-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK21-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK21-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK21-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK21-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK21-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK21-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK21-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK21-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK21-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK21-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK21-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK21-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK21-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK21-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK21-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK21-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK21-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK21-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK21-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK21-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK21-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK21-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK21-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK21-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: @@ -11951,8 +12339,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -11963,29 +12350,34 @@ // CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], i64 [[TMP7]]) +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11995,118 +12387,126 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK21-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK21-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD3]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK21-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK21: omp.inner.for.cond9: -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK21-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK21: omp.inner.for.body11: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK21-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK21-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD15]], ptr [[A16]], align 8 -// CHECK21-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 8 -// CHECK21-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC18]], ptr [[A17]], align 8 -// CHECK21-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK21-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP22]] -// CHECK21-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i64 1 -// CHECK21-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK21: omp.body.continue22: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK21: omp.inner.for.inc23: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP33:![0-9]+]] -// CHECK21: omp.inner.for.end25: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK21: omp.inner.for.cond8: +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK21: omp.inner.for.body10: +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK21-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK21-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD14]], ptr [[A15]], align 8 +// CHECK21-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 8 +// CHECK21-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC17]], ptr [[A16]], align 8 +// CHECK21-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i64 1 +// CHECK21-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK21: omp.body.continue21: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK21: omp.inner.for.inc22: +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK21: omp.inner.for.end24: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK21-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12120,30 +12520,31 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12153,68 +12554,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK21-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12228,7 +12635,7 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -12236,19 +12643,20 @@ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK23-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12258,53 +12666,57 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK23-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12316,21 +12728,22 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12340,58 +12753,62 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK23-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK23-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12404,27 +12821,27 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12434,62 +12851,68 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12509,7 +12932,7 @@ // CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12527,27 +12950,36 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12557,107 +12989,111 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK23-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK23-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK23-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK23-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK23-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK23-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK23-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK23-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK23-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK23-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK23-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK23-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12673,42 +13109,41 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK23-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK23-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12722,111 +13157,121 @@ // CHECK23-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK23-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK23-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK23-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK23-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK23-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK23-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK23-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK23-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK23-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK23-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK23-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK23-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK23-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK23-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK23-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK23-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK23-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK23-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK23-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK23-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK23-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK23-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK23-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK23-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK23-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: @@ -12844,8 +13289,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12856,29 +13300,34 @@ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], i32 [[TMP7]]) +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12888,118 +13337,126 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK23-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK23-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK23-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK23: omp.inner.for.cond9: -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK23-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK23: omp.inner.for.body11: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK23-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK23-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD15]], ptr [[A16]], align 4 -// CHECK23-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 4 -// CHECK23-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC18]], ptr [[A17]], align 4 -// CHECK23-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK23-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP22]] -// CHECK23-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i32 1 -// CHECK23-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK23: omp.body.continue22: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK23: omp.inner.for.inc23: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP34:![0-9]+]] -// CHECK23: omp.inner.for.end25: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK23: omp.inner.for.cond8: +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK23: omp.inner.for.body10: +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK23-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK23-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD14]], ptr [[A15]], align 4 +// CHECK23-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 4 +// CHECK23-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC17]], ptr [[A16]], align 4 +// CHECK23-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i32 1 +// CHECK23-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK23: omp.body.continue21: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK23: omp.inner.for.inc22: +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK23: omp.inner.for.end24: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK23-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13013,30 +13470,31 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13046,68 +13504,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK23-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -167,18 +167,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -190,71 +193,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -343,18 +348,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -366,69 +374,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -733,8 +743,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -743,173 +752,183 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -977,18 +996,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1000,70 +1022,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1227,8 +1251,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1237,171 +1260,181 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1469,18 +1502,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1492,68 +1528,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -287,18 +287,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -308,59 +311,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -372,18 +377,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -393,59 +401,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -457,18 +467,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -478,76 +491,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -727,18 +742,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -748,58 +766,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -811,18 +831,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -832,58 +855,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -895,18 +920,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -916,75 +944,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1533,27 +1563,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1566,81 +1599,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1658,27 +1695,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1691,81 +1731,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1784,33 +1828,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1823,100 +1869,106 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -2079,18 +2131,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2100,58 +2155,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2163,18 +2220,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2184,58 +2244,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2247,18 +2309,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2268,75 +2333,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2640,27 +2707,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2673,80 +2743,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2764,27 +2838,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2797,80 +2874,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2889,33 +2970,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2928,99 +3011,105 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -3183,18 +3272,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3204,57 +3296,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3266,18 +3360,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3287,57 +3384,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3349,18 +3448,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3370,74 +3472,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -347,8 +347,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -357,145 +356,155 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -714,7 +723,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -723,23 +732,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -747,119 +760,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1170,8 +1187,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1180,143 +1196,153 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1535,7 +1561,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1544,23 +1570,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1568,117 +1598,121 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2551,35 +2585,33 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2591,68 +2623,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -160,41 +160,39 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP0]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: store double [[TMP3]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP7]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -211,91 +209,101 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP33]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -334,8 +342,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -343,26 +350,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -370,102 +380,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -667,8 +683,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -678,27 +693,31 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -707,28 +726,34 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -738,106 +763,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP22]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -1021,7 +1046,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1030,23 +1055,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1055,26 +1084,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1084,104 +1117,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1389,8 +1422,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1400,27 +1432,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1429,28 +1465,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1460,104 +1502,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP22]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1741,7 +1783,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1750,23 +1792,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1775,26 +1821,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1804,102 +1854,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -279,15 +279,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -302,6 +304,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -317,75 +321,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -490,15 +494,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -514,6 +520,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -531,72 +539,72 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -828,15 +836,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -851,6 +861,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -866,73 +878,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1037,15 +1049,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1061,6 +1075,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1078,70 +1094,70 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2091,15 +2107,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2115,66 +2133,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -137,19 +137,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -160,81 +163,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -320,19 +325,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -343,81 +351,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -509,19 +519,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -532,81 +545,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -692,19 +707,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -715,81 +733,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1034,19 +1054,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1058,84 +1081,86 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp --- a/clang/test/OpenMP/target_teams_map_codegen.cpp +++ b/clang/test/OpenMP/target_teams_map_codegen.cpp @@ -138,19 +138,23 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -218,33 +222,39 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK1-NEXT: ret void // // @@ -312,60 +322,66 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -455,25 +471,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -533,25 +554,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -611,25 +637,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -761,85 +792,91 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..16, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -881,85 +918,91 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..20, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1124,68 +1167,74 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..24, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK1-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1216,68 +1265,74 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..28, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..28, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK1-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1372,19 +1427,23 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1452,33 +1511,39 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -1546,60 +1611,66 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1689,25 +1760,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1767,25 +1843,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1845,25 +1926,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1995,85 +2081,91 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..16, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2115,85 +2207,91 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..20, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2240,19 +2338,23 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: ret void // // @@ -2261,33 +2363,39 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK5-NEXT: ret void // // @@ -2296,60 +2404,66 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[X]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK5-NEXT: store ptr [[Y]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2387,25 +2501,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2413,25 +2532,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2439,25 +2563,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2466,85 +2595,91 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2586,85 +2721,91 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2706,68 +2847,74 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..10, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK5-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2798,68 +2945,74 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..12, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK5-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP10]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2888,19 +3041,23 @@ // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK7-SAME: () #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: ret void // // @@ -2909,33 +3066,39 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK7-NEXT: ret void // // @@ -2944,60 +3107,66 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK7-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[X]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[Y]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP11]] monotonic, align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP13]] monotonic, align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -3035,25 +3204,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -3061,25 +3235,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -3087,25 +3266,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -3114,85 +3298,91 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -3234,85 +3424,91 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP5]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP7]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP10]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP11]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP10]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -583,7 +583,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -591,29 +591,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -622,25 +628,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -649,21 +660,25 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -671,40 +686,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -714,8 +737,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -723,32 +745,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1168,7 +1197,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1176,29 +1205,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1207,25 +1242,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1234,21 +1274,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1256,40 +1300,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1299,8 +1351,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1308,32 +1359,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1348,21 +1406,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1370,21 +1432,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1394,7 +1460,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1402,29 +1468,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1433,25 +1505,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1459,19 +1536,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1481,8 +1562,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1490,32 +1570,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1523,21 +1610,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1545,21 +1636,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1569,7 +1664,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1577,29 +1672,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1608,25 +1709,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1634,19 +1740,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1656,8 +1766,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1665,31 +1774,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -599,7 +599,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -607,29 +607,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -638,25 +644,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -666,23 +677,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -690,40 +705,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -733,8 +756,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -742,32 +764,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1203,7 +1232,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1211,29 +1240,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1242,25 +1277,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1270,23 +1310,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1294,40 +1338,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1337,8 +1389,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1346,32 +1397,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1387,23 +1445,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1411,21 +1473,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1435,7 +1501,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1443,29 +1509,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1474,25 +1546,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1500,19 +1577,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1522,8 +1603,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1531,32 +1611,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1565,23 +1652,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1589,21 +1680,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1613,7 +1708,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1621,29 +1716,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1652,25 +1753,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1678,19 +1784,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1700,8 +1810,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1709,31 +1818,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1226,44 +1226,49 @@ // CHECK1-SAME: () #[[ATTR8]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[A]]) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -1275,7 +1280,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK1-NEXT: ret void @@ -1297,11 +1302,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -2346,44 +2351,49 @@ // CHECK2-SAME: () #[[ATTR8]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[A]]) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK2-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // @@ -2395,7 +2405,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-NEXT: ret void @@ -2417,11 +2427,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -3548,44 +3558,49 @@ // CHECK2-51-SAME: () #[[ATTR8]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-51-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[A]]) +// CHECK2-51-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-51-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK2-51-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK2-51-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-51-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-51-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-51-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-51-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK2-51-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-51-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-51-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-51-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-51-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-51-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-51-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-51-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-51-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2-51: omp_if.then: -// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-51-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..29) -// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK2-51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK2-51-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-51-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK2-51-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-51-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..29) +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK2-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK2-51-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK2-51-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-51-NEXT: br label [[OMP_IF_END]] // CHECK2-51: omp_if.end: -// CHECK2-51-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-51-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-51-NEXT: ret void // // @@ -3597,7 +3612,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3619,11 +3634,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META136:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) @@ -3657,26 +3672,26 @@ // CHECK2-51-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[E:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR2:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR3:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR11:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR12:%.*]] = alloca i64, align 8 // CHECK2-51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-51-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..32) -// CHECK2-51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], ptr [[TMP1]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], ptr [[TMP1]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 @@ -3698,9 +3713,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR]], align 8 // CHECK2-51-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]], i32 2, ptr [[TMP6]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..35) -// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], ptr [[TMP17]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], ptr [[TMP17]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], ptr [[TMP19]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 // CHECK2-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR2]], i64 0, i64 0 @@ -3722,9 +3737,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR3]], align 8 // CHECK2-51-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP17]], i32 2, ptr [[TMP22]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..38) -// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], ptr [[TMP33]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], ptr [[TMP33]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], ptr [[TMP35]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], ptr [[TMP33]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], ptr [[TMP33]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], ptr [[TMP35]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP37:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 8 // CHECK2-51-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 @@ -3746,9 +3761,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR6]], align 8 // CHECK2-51-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP33]], i32 2, ptr [[TMP38]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..41) -// CHECK2-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], ptr [[TMP49]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], ptr [[TMP49]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], ptr [[TMP51]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], ptr [[TMP49]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], ptr [[TMP51]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP53:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP53]], ptr [[TMP52]], align 8 // CHECK2-51-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR8]], i64 0, i64 0 @@ -3770,9 +3785,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR9]], align 8 // CHECK2-51-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]], i32 2, ptr [[TMP54]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..44) -// CHECK2-51-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], ptr [[TMP65]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], ptr [[TMP67]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], ptr [[TMP65]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], ptr [[TMP65]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], ptr [[TMP67]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP69:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP69]], ptr [[TMP68]], align 8 // CHECK2-51-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 @@ -3796,7 +3811,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3818,11 +3833,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META145:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]]) @@ -3850,7 +3865,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3872,11 +3887,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META155:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) @@ -3904,7 +3919,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3926,11 +3941,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META170:![0-9]+]]) @@ -3958,7 +3973,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3980,11 +3995,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META180:![0-9]+]]) @@ -4012,7 +4027,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -4034,11 +4049,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META185:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META188:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META190:![0-9]+]]) diff --git a/clang/test/OpenMP/task_if_codegen.cpp b/clang/test/OpenMP/task_if_codegen.cpp --- a/clang/test/OpenMP/task_if_codegen.cpp +++ b/clang/test/OpenMP/task_if_codegen.cpp @@ -90,25 +90,29 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -150,24 +154,24 @@ // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) // CHECK1-NEXT: [[TMP6:%.*]] = call i32 @.omp_task_entry..5(i32 [[TMP0]], ptr [[TMP4]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) // CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] @@ -181,7 +185,7 @@ // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..9) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], ptr [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP14]], i64 0 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0 @@ -224,7 +228,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -258,7 +262,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -292,7 +296,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -326,7 +330,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -349,30 +353,30 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR9:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR10:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR16:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR17:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..11) -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..13) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) // CHECK1-NEXT: [[TMP6:%.*]] = call i32 @.omp_task_entry..13(i32 [[TMP0]], ptr [[TMP4]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) // CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..15) -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], ptr [[TMP7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] @@ -386,7 +390,7 @@ // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..17) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], ptr [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 // CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP14]], i64 0 @@ -411,7 +415,7 @@ // CHECK1-NEXT: br label [[OMP_IF_END7]] // CHECK1: omp_if.end7: // CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..19) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], ptr [[TMP23]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], ptr [[TMP23]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 // CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP25]], i64 0 @@ -436,7 +440,7 @@ // CHECK1-NEXT: br label [[OMP_IF_END14]] // CHECK1: omp_if.end14: // CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..21) -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], ptr [[TMP34]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], ptr [[TMP34]], i32 0, i32 0 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 // CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP36]], i64 0 @@ -478,7 +482,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -512,7 +516,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -546,7 +550,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -580,7 +584,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -614,7 +618,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -648,7 +652,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp --- a/clang/test/OpenMP/task_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp @@ -68,7 +68,8 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -172,32 +173,42 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..11) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP44]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP45]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = load ptr, ptr [[TMP46]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP47]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..11) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP50]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP52]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP49]]) +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP54]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP50]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP55]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP51]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP56]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -455,53 +466,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 56, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 56, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -557,7 +566,7 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 @@ -568,7 +577,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP18]], ptr [[TMP17]], ptr [[TMP16]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP11]], 2 // CHECK1-NEXT: [[TMP23:%.*]] = udiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) @@ -602,7 +611,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp --- a/clang/test/OpenMP/taskgroup_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_codegen.cpp @@ -89,30 +89,34 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]] +// CHECK1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP4]]) #[[ATTR8]] // CHECK1-NEXT: unreachable // // @@ -159,31 +163,35 @@ // DEBUG1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // DEBUG1-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG20:![0-9]+]] { // DEBUG1-NEXT: entry: -// DEBUG1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 0, ptr @.omp_outlined.), !dbg [[DBG21:![0-9]+]] +// DEBUG1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// DEBUG1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG21:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG22:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.omp_outlined. -// DEBUG1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG23:![0-9]+]] { +// DEBUG1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG23:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// DEBUG1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // DEBUG1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// DEBUG1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG24:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG24]] -// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB5:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG24]] +// DEBUG1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// DEBUG1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG24:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG25]] +// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB5:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG25]] // DEBUG1-NEXT: invoke void @_Z3foov() -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG26:![0-9]+]] // DEBUG1: invoke.cont: -// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]] -// DEBUG1-NEXT: ret void, !dbg [[DBG26:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB5]], i32 [[TMP2]]), !dbg [[DBG26]] +// DEBUG1-NEXT: ret void, !dbg [[DBG27:![0-9]+]] // DEBUG1: terminate.lpad: -// DEBUG1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: catch ptr null, !dbg [[DBG25]] -// DEBUG1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0, !dbg [[DBG25]] -// DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]], !dbg [[DBG25]] -// DEBUG1-NEXT: unreachable, !dbg [[DBG25]] +// DEBUG1-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } +// DEBUG1-NEXT: catch ptr null, !dbg [[DBG26]] +// DEBUG1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[DBG26]] +// DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP4]]) #[[ATTR8]], !dbg [[DBG26]] +// DEBUG1-NEXT: unreachable, !dbg [[DBG26]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3foov diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,65 +444,63 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]], i32 1, ptr [[TMP27]], ptr [[TMP28]], i64 [[TMP31]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -570,7 +579,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -581,7 +590,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,65 +444,63 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]], i32 1, ptr [[TMP27]], ptr [[TMP28]], i64 [[TMP31]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -570,7 +579,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -581,7 +590,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -710,24 +710,29 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -735,24 +740,29 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -761,28 +771,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], ptr [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -791,28 +806,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], ptr [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -824,6 +844,7 @@ // CHECK1-NEXT: [[GBLB_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLA]], ptr [[GBLA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -839,23 +860,27 @@ // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i64 [[TMP3]], [[CONV]] // CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP5]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -865,6 +890,7 @@ // CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLC]], ptr [[GBLC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -874,30 +900,35 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..13, ptr [[COMP_ADDR]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GBLC]], ptr [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @Gbla, align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -1316,24 +1347,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1341,24 +1377,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1367,28 +1408,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1397,28 +1443,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1431,6 +1482,7 @@ // CHECK3-NEXT: [[LC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GBLB1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLA]], ptr [[GBLA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1449,23 +1501,27 @@ // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i64 [[TMP5]], [[CONV]] // CHECK3-NEXT: [[TMP7:%.*]] = trunc i64 [[ADD2]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP7]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1475,6 +1531,7 @@ // CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLC]], ptr [[GBLC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -1484,30 +1541,35 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..13, ptr [[COMP_ADDR]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GBLC]], ptr [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr @Gbla, align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1672,6 +1734,7 @@ // CHECK9-NEXT: [[GBLA_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[LA_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[GBLA]], ptr [[GBLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[LA]], ptr [[LA_ADDR]], align 8 @@ -1685,23 +1748,27 @@ // CHECK9-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i64 // CHECK9-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1711,6 +1778,7 @@ // CHECK9-NEXT: [[LB_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[GBLB_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[LB]], ptr [[LB_ADDR]], align 8 // CHECK9-NEXT: store ptr [[GBLB]], ptr [[GBLB_ADDR]], align 8 @@ -1725,23 +1793,27 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1906,6 +1978,7 @@ // CHECK11-NEXT: [[GBLA_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[LA_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[GBLA]], ptr [[GBLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[LA]], ptr [[LA_ADDR]], align 4 @@ -1919,23 +1992,27 @@ // CHECK11-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i64 // CHECK11-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -1945,6 +2022,7 @@ // CHECK11-NEXT: [[LB_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[GBLB_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[LB]], ptr [[LB_ADDR]], align 4 // CHECK11-NEXT: store ptr [[GBLB]], ptr [[GBLB_ADDR]], align 4 @@ -1959,23 +2037,27 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[A]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -2131,6 +2213,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -2138,23 +2221,27 @@ // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -2163,6 +2250,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -2172,23 +2260,27 @@ // CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP2]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -2344,6 +2436,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -2351,23 +2444,27 @@ // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -2376,6 +2473,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -2385,23 +2483,27 @@ // CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP2]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -2416,22 +2518,27 @@ // CHECK25-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK25-NEXT: ret void // // @@ -2439,22 +2546,27 @@ // CHECK25-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK25-NEXT: ret void // // @@ -2462,22 +2574,27 @@ // CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2485,22 +2602,27 @@ // CHECK27-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2510,6 +2632,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK33-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -2517,21 +2640,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK33-NEXT: ret void // // @@ -2541,6 +2668,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK33-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -2548,21 +2676,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK33-NEXT: ret void // // @@ -2572,6 +2704,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK35-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -2579,21 +2712,25 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK35-NEXT: ret void // // @@ -2603,6 +2740,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK35-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -2610,54 +2748,66 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK35-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@_Z3foov // CHECK41-SAME: () #[[ATTR0:[0-9]+]] { // CHECK41-NEXT: entry: -// CHECK41-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK41-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK41-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK41-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK41-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK41-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK41-NEXT: entry: // CHECK41-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK41-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK41-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK41-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK41-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK41-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK41-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK41-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@_Z3foov // CHECK43-SAME: () #[[ATTR0:[0-9]+]] { // CHECK43-NEXT: entry: -// CHECK43-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK43-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK43-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK43-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK43-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK43-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK43-NEXT: entry: // CHECK43-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK43-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK43-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK43-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK43-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK43-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK43-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK43-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -357,6 +357,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -366,17 +367,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -389,74 +393,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -467,20 +473,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -493,74 +503,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -755,6 +767,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -764,17 +777,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -787,73 +803,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -864,20 +882,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -890,73 +912,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1075,23 +1099,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1104,76 +1133,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1292,23 +1323,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1321,75 +1357,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1473,18 +1511,21 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1494,56 +1535,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -1625,18 +1668,21 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1646,55 +1692,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // @@ -1819,23 +1867,28 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1848,76 +1901,78 @@ // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK25-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK25-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2012,6 +2067,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -2020,16 +2076,18 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2039,55 +2097,57 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK25-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // @@ -2212,23 +2272,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2241,75 +2306,77 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK27-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -2404,6 +2471,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -2412,16 +2480,18 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2431,54 +2501,56 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp @@ -170,18 +170,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -193,68 +196,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -338,18 +343,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -361,66 +369,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -580,6 +590,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -588,153 +599,161 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -800,18 +819,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -823,67 +845,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1042,6 +1066,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1050,151 +1075,159 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1260,18 +1293,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1283,65 +1319,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -296,18 +296,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -317,56 +320,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -374,18 +379,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -395,56 +403,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -452,18 +462,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -473,73 +486,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -715,18 +730,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -736,55 +754,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -792,18 +812,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -813,55 +836,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -869,18 +894,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -890,72 +918,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1240,23 +1270,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1269,76 +1304,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1350,23 +1387,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1379,76 +1421,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1461,7 +1505,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1469,22 +1513,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1497,95 +1545,99 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1742,18 +1794,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1763,55 +1818,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1819,18 +1876,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1840,55 +1900,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1896,18 +1958,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1917,72 +1982,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -2269,23 +2336,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2298,75 +2370,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2378,23 +2452,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2407,75 +2486,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2488,7 +2569,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2496,22 +2577,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2524,94 +2609,98 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2768,18 +2857,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2789,54 +2881,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2844,18 +2938,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2865,54 +2962,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2920,18 +3019,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2941,71 +3043,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -350,8 +350,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -360,138 +359,148 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -712,7 +721,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -721,23 +730,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -745,112 +758,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1161,8 +1178,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1171,136 +1187,146 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1521,7 +1547,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1530,23 +1556,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1554,110 +1584,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1890,35 +1924,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1930,65 +1962,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -163,25 +163,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -189,97 +195,99 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -321,6 +329,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -334,20 +343,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -355,97 +369,99 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP31]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -613,6 +629,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -622,21 +639,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -644,31 +667,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -678,99 +703,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP22]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -956,6 +981,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -964,20 +990,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -985,28 +1016,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1016,97 +1049,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP23]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP24]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1316,6 +1349,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1325,21 +1359,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1347,31 +1387,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1381,97 +1423,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP22]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1657,6 +1699,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1665,20 +1708,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1686,28 +1734,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1717,95 +1767,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -355,6 +355,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -364,17 +365,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -384,87 +388,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -477,73 +493,77 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -551,21 +571,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -578,20 +598,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -601,87 +625,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -694,82 +730,86 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -962,6 +1002,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -971,17 +1012,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -991,85 +1035,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1082,70 +1138,74 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: @@ -1153,21 +1213,21 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] @@ -1180,20 +1240,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1203,85 +1267,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1294,79 +1370,83 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1485,23 +1565,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1511,90 +1596,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1607,84 +1705,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1803,23 +1905,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1829,88 +1936,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1923,81 +2043,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2081,83 +2205,97 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2167,64 +2305,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2306,81 +2448,95 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2390,61 +2546,65 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2569,23 +2729,28 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2595,90 +2760,103 @@ // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK25-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK25-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK25-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2691,84 +2869,88 @@ // CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK25-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK25-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK25-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2861,6 +3043,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -2869,81 +3052,94 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK25-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2953,63 +3149,67 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK25-NEXT: ret void // // @@ -3134,23 +3334,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3160,88 +3365,101 @@ // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3254,81 +3472,85 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK27-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -3421,6 +3643,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -3429,79 +3652,92 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3511,60 +3747,64 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp @@ -175,18 +175,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -194,66 +197,77 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -265,76 +279,80 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -418,18 +436,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -437,64 +458,75 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -506,72 +538,76 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -731,6 +767,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -739,273 +776,298 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1071,18 +1133,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1090,66 +1155,77 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1161,75 +1237,79 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1388,6 +1468,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1396,275 +1477,300 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1730,18 +1836,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1749,64 +1858,75 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1818,71 +1938,75 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -164,88 +164,104 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -255,66 +271,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -387,88 +407,104 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -478,66 +514,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -626,86 +666,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP9]], i32 [[TMP10]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -715,63 +771,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -844,86 +904,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i32 [[TMP9]], i32 [[TMP10]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -933,63 +1009,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -1024,88 +1104,104 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1113,76 +1209,80 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[I]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[I]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP23]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -308,83 +308,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -394,64 +408,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -459,83 +477,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -545,64 +577,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -610,103 +646,117 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -716,64 +766,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -949,81 +1003,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1033,61 +1101,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1095,81 +1167,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1179,61 +1265,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1241,101 +1331,115 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1345,61 +1449,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1696,23 +1804,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1722,90 +1835,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1818,84 +1944,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1907,23 +2037,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1933,90 +2068,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2029,84 +2177,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2120,7 +2272,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2129,22 +2281,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2154,121 +2310,135 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2281,85 +2451,91 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2528,83 +2704,97 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2614,63 +2804,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2678,83 +2872,97 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2764,63 +2972,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2830,118 +3042,132 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2951,64 +3177,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3307,23 +3539,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3333,88 +3570,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3427,81 +3677,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3513,23 +3767,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3539,88 +3798,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3633,81 +3905,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3721,7 +3997,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3730,22 +4006,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3755,119 +4035,133 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3880,82 +4174,88 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4124,81 +4424,95 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4208,60 +4522,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4269,81 +4587,95 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4353,60 +4685,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4416,116 +4752,130 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4535,61 +4885,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -384,8 +384,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -394,133 +393,156 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -560,137 +582,141 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -698,10 +724,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -713,9 +739,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -786,12 +812,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -876,7 +902,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -885,23 +911,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -909,107 +939,125 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1029,16 +1077,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1046,120 +1090,126 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1179,7 +1229,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1193,7 +1243,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1211,9 +1261,9 @@ // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1455,8 +1505,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1465,131 +1514,154 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1629,133 +1701,137 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1763,10 +1839,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1778,9 +1854,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1851,12 +1927,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1941,7 +2017,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1950,23 +2026,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1974,105 +2054,123 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2092,16 +2190,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2109,116 +2203,122 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2238,7 +2338,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2252,7 +2352,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2270,9 +2370,9 @@ // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2436,35 +2536,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2472,82 +2570,94 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2556,76 +2666,86 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -192,77 +192,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -272,139 +284,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -414,43 +442,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -458,14 +490,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -611,77 +643,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -691,43 +735,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -735,96 +783,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l89 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -834,43 +894,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -878,14 +942,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -894,104 +958,119 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1001,43 +1080,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -1045,14 +1128,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1196,77 +1279,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1276,43 +1371,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1320,96 +1419,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1419,43 +1530,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1463,14 +1578,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1479,104 +1594,119 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1586,43 +1716,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1630,14 +1764,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -201,25 +201,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -227,98 +233,112 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP13]], i64 [[TMP15]], ptr [[G2]], ptr [[TMP16]], ptr [[SVAR5]], ptr [[SFVAR6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP25]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[TMP33]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP35]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -326,105 +346,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -466,6 +490,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -479,20 +504,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -500,96 +530,110 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP12]], i32 [[TMP13]], ptr [[G2]], ptr [[TMP14]], ptr [[SVAR5]], ptr [[SFVAR6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP19]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP23]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -597,103 +641,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -861,6 +909,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -870,21 +919,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -892,31 +947,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -926,105 +986,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP17]], ptr [[SVAR7]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1032,39 +1102,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1074,99 +1148,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1184,10 +1258,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1199,9 +1273,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1272,12 +1346,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1352,6 +1426,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1360,20 +1435,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1381,133 +1461,147 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP16]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP36]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1515,136 +1609,140 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1664,7 +1762,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1677,7 +1775,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1854,6 +1952,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1863,21 +1962,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1885,31 +1990,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1919,103 +2029,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP15]], ptr [[SVAR7]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP24]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2023,37 +2143,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2063,97 +2187,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2171,10 +2295,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2186,9 +2310,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2259,12 +2383,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2339,6 +2463,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2347,20 +2472,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2368,131 +2498,145 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP14]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP23]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP34]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2500,132 +2644,136 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2645,7 +2793,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2658,7 +2806,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -237,78 +237,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -318,43 +330,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -364,20 +380,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -393,89 +409,102 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -485,43 +514,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -531,20 +564,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -753,78 +786,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -834,43 +879,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -880,98 +929,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -981,43 +1042,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1027,98 +1092,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1128,43 +1205,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1174,20 +1255,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1196,17 +1277,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -1217,77 +1298,90 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1297,43 +1391,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1343,20 +1441,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1531,78 +1629,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1612,43 +1722,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1658,20 +1772,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1687,89 +1801,102 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1779,43 +1906,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1825,20 +1956,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -2038,78 +2169,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2119,43 +2262,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2165,98 +2312,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2266,43 +2425,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2312,98 +2475,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2413,43 +2588,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2459,20 +2638,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -2481,17 +2660,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -2502,77 +2681,90 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2582,43 +2774,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2628,20 +2824,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -302,29 +302,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -340,55 +347,61 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -398,12 +411,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -418,14 +430,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -441,68 +457,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -515,10 +531,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -526,9 +542,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -566,12 +582,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -607,15 +623,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -623,81 +641,92 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -706,12 +735,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -721,97 +749,101 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -835,7 +867,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -849,7 +881,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1042,29 +1074,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1080,53 +1119,59 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1136,12 +1181,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1156,14 +1200,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1177,66 +1225,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1249,10 +1297,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1260,9 +1308,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1300,12 +1348,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1341,15 +1389,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1357,79 +1407,90 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1438,12 +1499,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1453,93 +1513,97 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1563,7 +1627,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1577,7 +1641,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1738,17 +1802,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1756,68 +1822,78 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1830,74 +1906,78 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -132,78 +132,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -213,135 +225,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -351,57 +379,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -449,78 +481,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -530,57 +574,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp @@ -156,190 +156,208 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -449,190 +467,208 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -748,186 +784,204 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1037,186 +1091,204 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1283,194 +1355,212 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP25]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -57,258 +57,277 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP30]], i64 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP33]] -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 9 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[LB_ADD_LEN9]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP41:%.*]] = add nuw i64 [[TMP40]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP42]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP50]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP52]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP53:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP53]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP54:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP54]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP55]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP58]], i64 [[TMP59]], ptr [[ARGC1]], ptr [[TMP60]]) +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP61]], [[TMP62]] +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP68]], [[TMP69]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP64]]) -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP66]], i32 1) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 -// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP72]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP73]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP71]]) +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP73]], i32 1) +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP77:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP79]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP80]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP74]], [[TMP75]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP76]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP81]], [[TMP82]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP77:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP77]] to i32 -// CHECK1-NEXT: [[TMP78:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP78]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP84]] to i32 +// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP85]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP76]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP72]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP79]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP79]] monotonic, align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP81]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP82]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP87:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP86]] monotonic, align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP88]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP83:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP88:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP83]], ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP85]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP87:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP83]], i8 [[TMP86]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP88]] = extractvalue { i8, i1 } [[TMP87]], 0 -// CHECK1-NEXT: [[TMP89:%.*]] = extractvalue { i8, i1 } [[TMP87]], 1 -// CHECK1-NEXT: br i1 [[TMP89]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP90:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP95:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP90]], ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP92]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP94:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP90]], i8 [[TMP93]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP95]] = extractvalue { i8, i1 } [[TMP94]], 0 +// CHECK1-NEXT: [[TMP96:%.*]] = extractvalue { i8, i1 } [[TMP94]], 1 +// CHECK1-NEXT: br i1 [[TMP96]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP81]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP88]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP90:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP90]]) +// CHECK1-NEXT: [[TMP97:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP97]]) // CHECK1-NEXT: ret void // // @@ -396,283 +415,284 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = sext i32 [[TMP34]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP35]] -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP36]], i64 9 -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP37]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP43:%.*]] = add nuw i64 [[TMP42]], 1 -// CHECK1-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP44]], ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP51]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP54]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP55:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP55]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP56]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP57]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP58]], [[TMP59]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP60]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP66]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP68]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[TMP69]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP70]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP67]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP67]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP77:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP77]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP79]]) -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP81]], i32 1) -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP85]], ptr [[TMP84]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: [[TMP88:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP88]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr +// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP89]], [[TMP90]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP93]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP87]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP94]] monotonic, align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP97:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP97]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP98:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP103:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP98]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP102:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP98]], i8 [[TMP101]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP103]] = extractvalue { i8, i1 } [[TMP102]], 0 -// CHECK1-NEXT: [[TMP104:%.*]] = extractvalue { i8, i1 } [[TMP102]], 1 -// CHECK1-NEXT: br i1 [[TMP104]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP96]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP105]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -811,20 +831,20 @@ // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 @@ -838,7 +858,7 @@ // CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -467,83 +467,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -553,64 +567,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -618,83 +636,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -704,64 +736,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -769,83 +805,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -855,85 +905,89 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -941,83 +995,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1027,55 +1095,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1090,83 +1162,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1176,55 +1262,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1501,81 +1591,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1585,61 +1689,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1647,81 +1755,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1731,61 +1853,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1793,81 +1919,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1877,80 +2017,84 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1958,81 +2102,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2042,52 +2200,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2102,81 +2264,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2186,52 +2362,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2508,83 +2688,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2594,64 +2788,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2659,83 +2857,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2745,64 +2957,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2810,83 +3026,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2896,85 +3126,89 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2982,83 +3216,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3068,55 +3316,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3131,83 +3383,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3217,55 +3483,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3542,81 +3812,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3626,61 +3910,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3688,81 +3976,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3772,61 +4074,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3834,81 +4140,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3918,80 +4238,84 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3999,81 +4323,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4083,52 +4421,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4143,81 +4485,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4227,52 +4583,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4746,23 +5106,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4772,90 +5137,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4868,84 +5246,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4957,23 +5339,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4983,90 +5370,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5079,84 +5479,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5170,7 +5574,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5179,22 +5583,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5204,121 +5612,135 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5331,85 +5753,91 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5421,23 +5849,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5447,90 +5880,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5543,73 +5989,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5630,7 +6080,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5639,22 +6089,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5664,96 +6118,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5766,75 +6234,81 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6112,83 +6586,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6198,63 +6686,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6262,83 +6754,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6348,63 +6854,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6414,97 +6924,111 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6514,86 +7038,92 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -6601,83 +7131,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6687,54 +7231,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6751,97 +7299,111 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6851,56 +7413,62 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7378,23 +7946,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7404,88 +7977,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7498,81 +8084,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7584,23 +8174,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7610,88 +8205,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7704,81 +8312,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7792,7 +8404,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7801,22 +8413,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7826,119 +8442,133 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7951,82 +8581,88 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -8038,23 +8674,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8064,88 +8705,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8158,70 +8812,74 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8242,7 +8900,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -8251,22 +8909,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8276,94 +8938,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8376,72 +9052,78 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8719,81 +9401,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8803,60 +9499,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8864,81 +9564,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8948,60 +9662,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -9011,95 +9729,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9109,81 +9841,87 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -9191,81 +9929,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9275,51 +10027,55 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9336,95 +10092,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9434,53 +10204,59 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9954,23 +10730,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9980,90 +10761,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10076,84 +10870,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10165,23 +10963,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10191,90 +10994,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10287,84 +11103,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10378,7 +11198,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10387,22 +11207,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10412,121 +11236,135 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10539,85 +11377,91 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10629,23 +11473,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10655,90 +11504,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10751,73 +11613,77 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10838,7 +11704,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10847,22 +11713,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10872,96 +11742,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10974,75 +11858,81 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11320,83 +12210,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11406,63 +12310,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11470,83 +12378,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11556,63 +12478,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11622,97 +12548,111 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11722,86 +12662,92 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -11809,83 +12755,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11895,54 +12855,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11959,97 +12923,111 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12059,56 +13037,62 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12586,23 +13570,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12612,88 +13601,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12706,81 +13708,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12792,23 +13798,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12818,88 +13829,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12912,81 +13936,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -13000,7 +14028,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -13009,22 +14037,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13034,119 +14066,133 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13159,82 +14205,88 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -13246,23 +14298,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13272,88 +14329,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13366,70 +14436,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13450,7 +14524,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -13459,22 +14533,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13484,94 +14562,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13584,72 +14676,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13927,81 +15025,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14011,60 +15123,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -14072,81 +15188,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14156,60 +15286,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -14219,95 +15353,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14317,81 +15465,87 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -14399,81 +15553,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14483,51 +15651,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14544,95 +15716,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14642,53 +15828,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -372,6 +372,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -381,17 +382,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -401,79 +405,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -486,14 +505,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -506,88 +522,92 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -605,112 +625,134 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[I4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[I3]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -719,129 +761,129 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP17]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -1048,6 +1090,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -1057,17 +1100,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1077,77 +1123,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1160,14 +1221,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1180,85 +1238,89 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1276,110 +1338,132 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[I4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1388,126 +1472,126 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i32 0, i32 [[TMP30]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1909,116 +1993,141 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i64 16) ] // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[I4]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[I3]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -2027,131 +2136,130 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i64 16) ] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2285,114 +2393,139 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i32 16) ] // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[I4]], ptr [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[I3]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2401,128 +2534,127 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i32 16) ] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 [[TMP32]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK11-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -2785,98 +2917,114 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[I1]], ptr [[TMP1]]) +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[I]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, ptr [[TMP2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2884,86 +3032,90 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP8]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTLINEAR_START]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I2]], align 4 -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I2]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A5]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP8]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A4]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, ptr [[TMP6]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: @@ -3060,96 +3212,112 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i32 [[TMP9]], i32 [[TMP10]], ptr [[I1]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[I]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, ptr [[TMP2]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -3157,83 +3325,87 @@ // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP8]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTLINEAR_START]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I1]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I1]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A4]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP8]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A3]], i32 0, i32 [[TMP20]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, ptr [[TMP6]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: @@ -3498,116 +3670,141 @@ // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK25-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i64 16) ] // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[I4]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK25-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[I3]], ptr [[TMP28]], align 8 +// CHECK25-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store i64 [[TMP6]], ptr [[TMP30]], align 8 +// CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK25-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK25-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK25-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK25-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: // CHECK25-NEXT: br label [[OMP_PRECOND_END]] @@ -3616,131 +3813,130 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK25-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i64 16) ] +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK25-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK25-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK25-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK25-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK25-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK25-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK25-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK25-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK25-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK25-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK25-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK25-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK25-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK25-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK25-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK25-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: -// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK25-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK25-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK25-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK25: .omp.linear.pu: // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: @@ -3836,6 +4032,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -3844,73 +4041,88 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK25-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, ptr [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3919,13 +4131,11 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3935,66 +4145,70 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK25-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, ptr [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4135,114 +4349,139 @@ // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i32 16) ] // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[I4]], ptr [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[I3]], ptr [[TMP26]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store i32 [[TMP6]], ptr [[TMP28]], align 4 +// CHECK27-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK27-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK27-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK27-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK27-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK27-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK27-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK27-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK27-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: // CHECK27-NEXT: br label [[OMP_PRECOND_END]] @@ -4251,128 +4490,127 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK27-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i32 16) ] +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK27-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK27-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK27-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 [[TMP23]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK27-NEXT: [[TMP32:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 [[TMP32]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK27-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK27-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK27-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK27-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK27-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK27-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK27-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK27-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK27-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK27-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK27-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK27-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: -// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK27-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK27-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK27-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK27: .omp.linear.pu: // CHECK27-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK27: .omp.linear.pu.done: @@ -4468,6 +4706,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -4476,71 +4715,86 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK27-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK27-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, ptr [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4549,13 +4803,11 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4565,63 +4817,67 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK27-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK27-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, ptr [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -180,18 +180,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -199,58 +202,71 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -260,13 +276,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -278,79 +292,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -439,18 +457,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -458,56 +479,69 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -517,13 +551,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -535,75 +567,79 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -908,6 +944,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -916,133 +953,160 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK9-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1051,172 +1115,170 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1284,18 +1346,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1303,58 +1368,71 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1364,13 +1442,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1382,78 +1458,82 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1617,6 +1697,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1625,135 +1706,162 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP36]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP45]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1762,172 +1870,170 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1995,18 +2101,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2014,56 +2123,69 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2073,13 +2195,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2091,74 +2211,78 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -311,75 +311,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -388,13 +404,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -404,67 +418,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -476,75 +494,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -553,13 +587,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -569,67 +601,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -641,95 +677,111 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -738,13 +790,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -754,67 +804,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -994,73 +1048,89 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1069,13 +1139,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1085,64 +1153,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1154,73 +1226,89 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1229,13 +1317,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1245,64 +1331,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1314,93 +1404,109 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1409,13 +1515,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1425,64 +1529,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2030,23 +2138,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2056,81 +2169,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2143,15 +2273,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2164,90 +2290,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2265,23 +2395,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2291,81 +2426,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2378,15 +2530,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2399,90 +2547,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2502,7 +2654,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2511,22 +2663,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2536,111 +2692,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK9-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -2653,16 +2827,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2675,91 +2845,97 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2934,75 +3110,91 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3011,13 +3203,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3027,66 +3217,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3098,75 +3292,91 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3175,13 +3385,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3191,66 +3399,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3264,109 +3476,125 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3375,14 +3603,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3392,67 +3618,73 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3755,23 +3987,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3781,79 +4018,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3866,15 +4120,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3887,87 +4137,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3985,23 +4239,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4011,79 +4270,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4096,15 +4372,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4117,87 +4389,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -4217,7 +4493,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4226,22 +4502,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4251,109 +4531,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4366,16 +4664,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4388,88 +4682,94 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4644,73 +4944,89 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4719,13 +5035,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4735,63 +5049,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4803,73 +5121,89 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4878,13 +5212,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4894,63 +5226,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4964,107 +5300,123 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5073,14 +5425,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5090,64 +5440,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -387,8 +387,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -397,140 +396,163 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -570,144 +592,148 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -715,10 +741,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -730,9 +756,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -803,12 +829,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -893,7 +919,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -902,23 +928,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -926,114 +956,132 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1053,16 +1101,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1070,127 +1114,133 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1210,7 +1260,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1224,7 +1274,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1242,9 +1292,9 @@ // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1486,8 +1536,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1496,138 +1545,161 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1667,140 +1739,144 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1808,10 +1884,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1823,9 +1899,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1896,12 +1972,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1986,7 +2062,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1995,23 +2071,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2019,112 +2099,130 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2144,16 +2242,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2161,123 +2255,129 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2297,7 +2397,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2311,7 +2411,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2329,9 +2429,9 @@ // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -3141,35 +3241,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3177,72 +3275,85 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3251,15 +3362,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3268,79 +3378,89 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -189,70 +189,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -261,12 +274,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -276,60 +288,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -340,75 +356,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -417,12 +446,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -432,43 +460,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -476,17 +508,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -636,70 +668,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -708,12 +753,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -723,43 +767,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -767,17 +815,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -788,75 +836,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -865,12 +926,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -880,43 +940,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -924,17 +988,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -947,97 +1011,113 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1046,12 +1126,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1061,43 +1140,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -1105,17 +1188,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1263,70 +1346,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1335,12 +1431,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1350,43 +1445,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -1394,17 +1493,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1415,75 +1514,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1492,12 +1604,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1507,43 +1618,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -1551,17 +1666,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1574,97 +1689,113 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1673,12 +1804,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1688,43 +1818,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -1732,17 +1866,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1836,70 +1970,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1908,12 +2055,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1923,60 +2069,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1987,75 +2137,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2064,12 +2227,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2079,43 +2241,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -2123,17 +2289,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2283,70 +2449,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2355,12 +2534,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2370,43 +2548,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -2414,17 +2596,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2435,75 +2617,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2512,12 +2707,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2527,43 +2721,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2571,17 +2769,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2594,151 +2792,172 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then4: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP20]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK3: omp_if.else5: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK3: omp.inner.for.cond6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK3: omp.inner.for.body8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK3: omp_if.then13: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK3-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK3: omp_if.else14: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_IF_END16]] -// CHECK3: omp_if.end16: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK3: omp.inner.for.inc17: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK3: omp.inner.for.end19: -// CHECK3-NEXT: br label [[OMP_IF_END20]] -// CHECK3: omp_if.end20: +// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK3-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK3: omp_if.then15: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK3: omp_if.else16: +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP34]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: br label [[OMP_IF_END18]] +// CHECK3: omp_if.end18: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK3: omp.inner.for.inc19: +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK3: omp.inner.for.end21: +// CHECK3-NEXT: br label [[OMP_IF_END22]] +// CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2747,13 +2966,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2763,48 +2981,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -2812,60 +3038,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2874,13 +3100,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2890,48 +3115,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -2939,60 +3172,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3140,70 +3373,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3212,12 +3458,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3227,43 +3472,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -3271,17 +3520,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3292,75 +3541,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3369,12 +3631,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3384,43 +3645,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3428,17 +3693,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3451,97 +3716,113 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3550,12 +3831,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3565,43 +3845,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -3609,17 +3893,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4304,70 +4588,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4376,12 +4673,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4391,60 +4687,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4455,75 +4755,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4532,12 +4845,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4547,43 +4859,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -4591,17 +4907,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4751,70 +5067,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4823,12 +5152,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4838,43 +5166,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -4882,17 +5214,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4903,75 +5235,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4980,12 +5325,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4995,43 +5339,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -5039,17 +5387,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5062,97 +5410,113 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5161,12 +5525,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5176,43 +5539,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -5220,17 +5587,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5378,70 +5745,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5450,12 +5830,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5465,43 +5844,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -5509,17 +5892,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5530,75 +5913,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5607,12 +6003,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5622,43 +6017,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -5666,17 +6065,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5689,97 +6088,113 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5788,12 +6203,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5803,43 +6217,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -5847,17 +6265,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5951,70 +6369,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6023,12 +6454,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6038,60 +6468,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6102,75 +6536,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6179,12 +6626,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6194,43 +6640,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -6238,17 +6688,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6398,70 +6848,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6470,12 +6933,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6485,43 +6947,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -6529,17 +6995,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6550,75 +7016,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6627,12 +7106,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6642,43 +7120,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6686,17 +7168,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6709,151 +7191,172 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then4: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP20]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK11: omp_if.else5: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK11: omp.inner.for.cond6: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK11: omp.inner.for.body8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK11: omp_if.then13: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK11-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK11: omp_if.else14: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: br label [[OMP_IF_END16]] -// CHECK11: omp_if.end16: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK11: omp.inner.for.inc17: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK11: omp.inner.for.end19: -// CHECK11-NEXT: br label [[OMP_IF_END20]] -// CHECK11: omp_if.end20: +// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK11-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK11: omp_if.then15: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK11: omp_if.else16: +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP34]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: br label [[OMP_IF_END18]] +// CHECK11: omp_if.end18: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK11: omp.inner.for.inc19: +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK11: omp.inner.for.end21: +// CHECK11-NEXT: br label [[OMP_IF_END22]] +// CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6862,13 +7365,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6878,48 +7380,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -6927,60 +7437,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6989,13 +7499,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7005,48 +7514,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -7054,60 +7571,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7255,70 +7772,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7327,12 +7857,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7342,43 +7871,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -7386,17 +7919,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7407,75 +7940,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7484,12 +8030,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7499,43 +8044,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7543,17 +8092,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7566,97 +8115,113 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, i64 [[TMP2]]) +// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7665,12 +8230,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7680,43 +8244,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -7724,17 +8292,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -210,25 +210,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -236,105 +242,119 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP13]], i64 [[TMP15]], ptr [[G2]], ptr [[TMP16]], ptr [[SVAR5]], ptr [[SFVAR6]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[TMP35]], align 8 +// CHECK1-NEXT: store volatile double [[TMP36]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP38]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -342,112 +362,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -489,6 +513,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -502,20 +527,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -523,103 +553,117 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP12]], i32 [[TMP13]], ptr [[G2]], ptr [[TMP14]], ptr [[SVAR5]], ptr [[SFVAR6]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[TMP33]], align 4 +// CHECK3-NEXT: store volatile double [[TMP34]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP35]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP36]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -627,110 +671,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -934,6 +982,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -943,21 +992,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -965,31 +1020,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -999,112 +1059,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP17]], ptr [[SVAR7]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP39]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP42]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1112,39 +1182,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1154,106 +1228,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1271,10 +1345,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1286,9 +1360,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1359,12 +1433,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1439,6 +1513,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1447,20 +1522,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1468,140 +1548,154 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1609,143 +1703,147 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1765,7 +1863,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1778,7 +1876,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1955,6 +2053,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1964,21 +2063,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1986,31 +2091,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2020,110 +2130,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP15]], ptr [[SVAR7]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2131,37 +2251,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2171,104 +2295,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2286,10 +2410,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2301,9 +2425,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2374,12 +2498,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2454,6 +2578,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2462,20 +2587,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2483,138 +2613,152 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP14]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP36]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2622,139 +2766,143 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2774,7 +2922,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2787,7 +2935,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -239,71 +239,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -312,12 +325,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -327,43 +339,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -373,27 +389,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: unreachable // // @@ -409,82 +425,96 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -493,12 +523,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -508,43 +537,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -554,27 +587,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: unreachable // // @@ -783,71 +816,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -856,12 +902,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -871,43 +916,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -917,98 +966,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1017,12 +1079,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1032,43 +1093,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1078,98 +1143,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1178,12 +1256,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1193,43 +1270,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1239,27 +1320,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: unreachable // // @@ -1268,17 +1349,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -1289,70 +1370,84 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1361,12 +1456,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1376,43 +1470,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1422,27 +1520,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: unreachable // // @@ -1966,71 +2064,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2039,12 +2150,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2054,43 +2164,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2100,27 +2214,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: unreachable // // @@ -2136,82 +2250,96 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2220,12 +2348,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2235,43 +2362,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2281,27 +2412,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: unreachable // // @@ -2501,71 +2632,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2574,12 +2718,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2589,43 +2732,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2635,98 +2782,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2735,12 +2895,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2750,43 +2909,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2796,98 +2959,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2896,12 +3072,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2911,43 +3086,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2957,27 +3136,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: unreachable // // @@ -2986,17 +3165,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -3007,70 +3186,84 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3079,12 +3272,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3094,43 +3286,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -3140,27 +3336,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -304,29 +304,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -342,62 +349,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -407,12 +420,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -427,14 +439,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -450,75 +466,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -531,10 +547,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -542,9 +558,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -582,12 +598,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -623,15 +639,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -639,88 +657,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -729,12 +758,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -744,104 +772,108 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -865,7 +897,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -879,7 +911,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1072,29 +1104,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1110,60 +1149,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1173,12 +1218,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1193,14 +1237,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1214,73 +1262,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1293,10 +1341,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1304,9 +1352,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1344,12 +1392,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1385,15 +1433,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1401,86 +1451,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1489,12 +1550,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1504,100 +1564,104 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1621,7 +1685,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1635,7 +1699,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2548,17 +2612,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2566,61 +2632,72 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2629,12 +2706,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2647,77 +2723,81 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -134,71 +134,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -207,12 +220,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -222,60 +234,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -286,71 +302,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -359,12 +388,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -374,60 +402,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -479,71 +511,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -552,12 +597,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -567,60 +611,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -160,204 +160,222 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -467,204 +485,222 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -780,200 +816,218 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1083,200 +1137,218 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1541,208 +1613,226 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -477,75 +477,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -554,13 +570,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -570,67 +584,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -642,75 +660,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -719,13 +753,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -735,67 +767,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -807,75 +843,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -884,13 +936,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -900,88 +950,92 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -993,75 +1047,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1070,13 +1140,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1086,55 +1154,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1142,9 +1214,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1156,75 +1228,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1233,13 +1321,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1249,55 +1335,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1305,9 +1395,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1581,75 +1671,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1658,13 +1764,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1674,67 +1778,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1746,75 +1854,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1823,13 +1947,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1839,67 +1961,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1911,75 +2037,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1988,13 +2130,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2004,88 +2144,92 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK2-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2097,75 +2241,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2174,13 +2334,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2190,55 +2348,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2246,9 +2408,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2260,75 +2422,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2337,13 +2515,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2353,55 +2529,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2409,9 +2589,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2685,73 +2865,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2760,13 +2956,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2776,64 +2970,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2845,73 +3043,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2920,13 +3134,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2936,64 +3148,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3005,73 +3221,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3080,13 +3312,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3096,83 +3326,87 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3184,73 +3418,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3259,13 +3509,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3275,52 +3523,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3328,9 +3580,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3342,73 +3594,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] // CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3417,13 +3685,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3433,52 +3699,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3486,9 +3756,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3762,73 +4032,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3837,13 +4123,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3853,64 +4137,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3922,73 +4210,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3997,13 +4301,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4013,64 +4315,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4082,73 +4388,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4157,13 +4479,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4173,83 +4493,87 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK6-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK6-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK6-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK6-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK6-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4261,73 +4585,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4336,13 +4676,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4352,52 +4690,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4405,9 +4747,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4419,73 +4761,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4494,13 +4852,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4510,52 +4866,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4563,9 +4923,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5421,23 +5781,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5447,81 +5812,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5534,15 +5916,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5555,90 +5933,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5656,23 +6038,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5682,81 +6069,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5769,15 +6173,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5790,90 +6190,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5893,7 +6297,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5902,22 +6306,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5927,111 +6335,129 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK13-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -6044,16 +6470,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6066,91 +6488,97 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -6168,23 +6596,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6194,81 +6627,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6281,15 +6731,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6302,73 +6748,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6376,12 +6826,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -6401,7 +6851,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -6410,22 +6860,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6435,86 +6889,104 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]), !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK13-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6527,16 +6999,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6549,75 +7017,81 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) -// CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) +// CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6625,12 +7099,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -6907,75 +7381,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6984,13 +7474,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7000,66 +7488,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7071,75 +7563,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7148,13 +7656,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7164,66 +7670,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7237,88 +7747,104 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7327,14 +7853,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7344,89 +7868,95 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7438,75 +7968,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7515,13 +8061,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7531,54 +8075,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7586,9 +8134,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7602,88 +8150,104 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7692,14 +8256,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7709,56 +8271,62 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7766,9 +8334,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8239,23 +8807,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8265,81 +8838,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8352,15 +8942,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8373,90 +8959,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8474,23 +9064,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8500,81 +9095,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8587,15 +9199,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8608,90 +9216,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8711,7 +9323,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -8720,22 +9332,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8745,111 +9361,129 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK14-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK14-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK14-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK14-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK14-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK14: cond.true11: -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[COND_END13:%.*]] // CHECK14: cond.false12: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[COND_END13]] // CHECK14: cond.end13: -// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK14-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK14-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK14-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK14-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK14-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK14-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK14-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -8862,16 +9496,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8884,91 +9514,97 @@ // CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK14-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -8986,23 +9622,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9012,81 +9653,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -9099,15 +9757,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9120,73 +9774,77 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK14-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -9194,12 +9852,12 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK14-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -9219,7 +9877,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9228,22 +9886,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9253,86 +9915,104 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]), !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK14-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK14-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK14-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9345,16 +10025,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9367,75 +10043,81 @@ // CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK14-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -9443,12 +10125,12 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -9725,75 +10407,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9802,13 +10500,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9818,66 +10514,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9889,75 +10589,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9966,13 +10682,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9982,66 +10696,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10055,88 +10773,104 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10145,14 +10879,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10162,89 +10894,95 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK14-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10256,75 +10994,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10333,13 +11087,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10349,54 +11101,58 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK14-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK14-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -10404,9 +11160,9 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK14-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10420,88 +11176,104 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10510,14 +11282,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10527,56 +11297,62 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK14-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -10584,9 +11360,9 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK14-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11061,23 +11837,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11087,79 +11868,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11172,15 +11970,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11193,87 +11987,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -11291,23 +12089,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11317,79 +12120,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11402,15 +12222,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11423,87 +12239,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -11523,7 +12343,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11532,22 +12352,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11557,109 +12381,127 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK17-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -11672,16 +12514,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11694,88 +12532,94 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -11793,23 +12637,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11819,79 +12668,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11904,15 +12770,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11925,70 +12787,74 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11996,12 +12862,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -12021,7 +12887,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12030,22 +12896,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12055,84 +12925,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]), !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -12145,16 +13033,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12167,72 +13051,78 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12240,12 +13130,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -12522,73 +13412,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12597,13 +13503,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12613,63 +13517,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12681,73 +13589,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12756,13 +13680,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12772,63 +13694,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12842,86 +13768,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12930,14 +13872,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12947,84 +13887,90 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13036,73 +13982,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13111,13 +14073,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13127,51 +14087,55 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -13179,9 +14143,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13195,86 +14159,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13283,14 +14263,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13300,53 +14278,59 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -13354,9 +14338,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13831,23 +14815,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13857,79 +14846,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13942,15 +14948,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13963,87 +14965,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14061,23 +15067,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14087,79 +15098,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14172,15 +15200,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14193,87 +15217,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14293,7 +15321,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -14302,22 +15330,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14327,109 +15359,127 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -14442,16 +15492,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14464,88 +15510,94 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -14563,23 +15615,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14589,79 +15646,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14674,15 +15748,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14695,70 +15765,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14766,12 +15840,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -14791,7 +15865,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -14800,22 +15874,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14825,84 +15903,102 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]), !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -14915,16 +16011,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14937,72 +16029,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15010,12 +16108,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -15292,73 +16390,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15367,13 +16481,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15383,63 +16495,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15451,73 +16567,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15526,13 +16658,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15542,63 +16672,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15612,86 +16746,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15700,14 +16850,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15717,84 +16865,90 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15806,73 +16960,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15881,13 +17051,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15897,51 +17065,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15949,9 +17121,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15965,86 +17137,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -16053,14 +17241,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -16070,53 +17256,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -16124,9 +17316,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp @@ -282,15 +282,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -305,6 +307,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -320,68 +324,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -486,15 +490,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -510,6 +516,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -527,65 +535,65 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -817,15 +825,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -840,6 +850,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -855,66 +867,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1019,15 +1031,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1043,6 +1057,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1060,63 +1076,63 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1316,17 +1332,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1342,63 +1360,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp @@ -145,18 +145,21 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -167,74 +170,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -324,18 +329,21 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -346,74 +354,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -509,18 +519,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -531,74 +544,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -688,18 +703,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -710,74 +728,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -824,18 +844,21 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -847,77 +870,79 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP17]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -403,6 +403,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -413,112 +414,118 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -531,20 +538,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -557,80 +568,82 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -841,6 +854,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -851,111 +865,117 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -968,20 +988,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -994,79 +1018,81 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1459,23 +1485,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1488,82 +1519,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1688,23 +1721,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1717,81 +1755,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2056,18 +2096,21 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2077,62 +2120,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP9:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 123, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2241,18 +2286,21 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2262,61 +2310,63 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP9:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP10]] +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] // CHECK19-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 123, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2426,7 +2476,7 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 @@ -2434,22 +2484,24 @@ // CHECK21-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK21-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP3]]) +// CHECK21-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2459,99 +2511,105 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP10:%.*]] = load float, ptr [[B]], align 4, !nontemporal !5, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[CONV:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load float, ptr [[B]], align 4, !nontemporal !5, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK21-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK21-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] -// CHECK21: omp.inner.for.cond3: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END16:%.*]] -// CHECK21: omp.inner.for.body5: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK21-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 -// CHECK21-NEXT: [[B8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP16:%.*]] = load float, ptr [[B8]], align 4 -// CHECK21-NEXT: [[CONV9:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK21-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], ptr [[A10]], i64 0, i64 [[IDXPROM11]] -// CHECK21-NEXT: store i32 [[CONV9]], ptr [[ARRAYIDX12]], align 4 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE13:%.*]] -// CHECK21: omp.body.continue13: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC14:%.*]] -// CHECK21: omp.inner.for.inc14: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK21-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK21: omp.inner.for.end16: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] +// CHECK21: omp.inner.for.cond4: +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END17:%.*]] +// CHECK21: omp.inner.for.body6: +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK21-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[B9]], align 4 +// CHECK21-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK21-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i64 0, i64 [[IDXPROM12]] +// CHECK21-NEXT: store i32 [[CONV10]], ptr [[ARRAYIDX13]], align 4 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] +// CHECK21: omp.body.continue14: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] +// CHECK21: omp.inner.for.inc15: +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK21-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK21: omp.inner.for.end17: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK21-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 123, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2661,7 +2719,7 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 @@ -2669,22 +2727,24 @@ // CHECK23-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK23-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP3]]) +// CHECK23-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2694,97 +2754,103 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP10:%.*]] = load float, ptr [[B]], align 4, !nontemporal !6, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[CONV:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load float, ptr [[B]], align 4, !nontemporal !6, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] // CHECK23-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK23-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] -// CHECK23: omp.inner.for.cond3: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END15:%.*]] -// CHECK23: omp.inner.for.body5: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK23-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 -// CHECK23-NEXT: [[B8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP16:%.*]] = load float, ptr [[B8]], align 4 -// CHECK23-NEXT: [[CONV9:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK23-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [123 x i32], ptr [[A10]], i32 0, i32 [[TMP17]] -// CHECK23-NEXT: store i32 [[CONV9]], ptr [[ARRAYIDX11]], align 4 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] -// CHECK23: omp.body.continue12: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] -// CHECK23: omp.inner.for.inc13: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK23-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK23: omp.inner.for.end15: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] +// CHECK23: omp.inner.for.cond4: +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END16:%.*]] +// CHECK23: omp.inner.for.body6: +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK23-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK23-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[B9]], align 4 +// CHECK23-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK23-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i32 0, i32 [[TMP21]] +// CHECK23-NEXT: store i32 [[CONV10]], ptr [[ARRAYIDX12]], align 4 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE13:%.*]] +// CHECK23: omp.body.continue13: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC14:%.*]] +// CHECK23: omp.inner.for.inc14: +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK23-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK23: omp.inner.for.end16: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK23-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 123, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3226,23 +3292,28 @@ // CHECK33-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK33-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK33-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK33-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3255,82 +3326,84 @@ // CHECK33-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK33-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK33-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK33-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK33-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK33-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK33-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK33-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK33-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK33-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK33: omp.precond.then: // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK33-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK33-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK33-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK33-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK33-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK33-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK33-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK33-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK33-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK33-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK33-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK33-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK33-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: -// CHECK33-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK33-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK33-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK33-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK33-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3429,6 +3502,7 @@ // CHECK33-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK33-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK33-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -3437,16 +3511,18 @@ // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3456,58 +3532,60 @@ // CHECK33-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK33-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK33-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK33-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK33-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK33-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK33-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK33-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: // CHECK33-NEXT: store i32 10, ptr [[I]], align 4 // CHECK33-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3636,23 +3714,28 @@ // CHECK35-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK35-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK35-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK35-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3665,81 +3748,83 @@ // CHECK35-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK35-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK35-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK35-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK35-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK35-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK35-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK35-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK35: omp.precond.then: // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK35-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK35-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK35-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK35-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK35-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK35-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK35-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK35-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK35-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK35-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK35-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK35-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK35-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK35-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK35-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: -// CHECK35-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK35-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK35-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK35-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK35-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3838,6 +3923,7 @@ // CHECK35-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK35-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK35-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -3846,16 +3932,18 @@ // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3865,57 +3953,59 @@ // CHECK35-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK35-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK35-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK35-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK35-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK35-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK35-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK35-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK35-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: // CHECK35-NEXT: store i32 10, ptr [[I]], align 4 // CHECK35-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4056,7 +4146,7 @@ // CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK37-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK37-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK37-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4067,24 +4157,28 @@ // CHECK37-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK37-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK37-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK37-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK37-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK37-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK37-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK37-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP6]], align 8 +// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4097,119 +4191,125 @@ // CHECK37-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK37-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK37-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK37-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK37-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK37-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK37-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK37-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK37-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK37-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK37-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK37-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK37-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK37: omp.precond.then: // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK37-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK37-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK37-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK37-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK37-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK37: omp_if.then: // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK37-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK37-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK37-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK37-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK37-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK37-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK37-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK37-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_IF_END:%.*]] // CHECK37: omp_if.else: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK37: omp.inner.for.cond8: -// CHECK37-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK37-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END18:%.*]] -// CHECK37: omp.inner.for.body10: -// CHECK37-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK37-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK37-NEXT: store i32 [[ADD12]], ptr [[I4]], align 4 -// CHECK37-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK37-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK37-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM13]] -// CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX14]], align 4 -// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] -// CHECK37: omp.body.continue15: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] -// CHECK37: omp.inner.for.inc16: -// CHECK37-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK37-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK37: omp.inner.for.end18: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK37: omp.inner.for.cond9: +// CHECK37-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK37-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END19:%.*]] +// CHECK37: omp.inner.for.body11: +// CHECK37-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK37-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK37-NEXT: store i32 [[ADD13]], ptr [[I4]], align 4 +// CHECK37-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK37-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK37-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM14]] +// CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX15]], align 4 +// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE16:%.*]] +// CHECK37: omp.body.continue16: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] +// CHECK37: omp.inner.for.inc17: +// CHECK37-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK37-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK37: omp.inner.for.end19: // CHECK37-NEXT: br label [[OMP_IF_END]] // CHECK37: omp_if.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK37-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK37-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK37-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK37-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: -// CHECK37-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK37-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK37-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK37-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK37-NEXT: store i32 [[ADD22]], ptr [[I4]], align 4 +// CHECK37-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK37-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK37-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK37-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK37-NEXT: store i32 [[ADD23]], ptr [[I4]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK37: .omp.final.done: // CHECK37-NEXT: br label [[OMP_PRECOND_END]] @@ -4304,6 +4404,7 @@ // CHECK37-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK37-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK37-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK37-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -4312,16 +4413,18 @@ // CHECK37-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK37-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4331,58 +4434,60 @@ // CHECK37-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK37-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK37-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK37-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK37-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK37-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK37-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK37-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK37-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK37-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK37-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: // CHECK37-NEXT: store i32 10, ptr [[I]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4523,7 +4628,7 @@ // CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK39-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK39-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK39-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4534,24 +4639,28 @@ // CHECK39-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK39-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK39-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK39-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK39-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK39-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK39-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP6]], align 4 +// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4564,117 +4673,123 @@ // CHECK39-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK39-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK39-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK39-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK39-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 4 +// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK39-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK39-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK39-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK39-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK39-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK39-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK39: omp.precond.then: // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK39-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK39-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK39-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK39-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK39-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK39: omp_if.then: // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK39-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK39-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK39-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK39-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK39-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK39-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP18]] +// CHECK39-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP24]] // CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK39-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK39-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_IF_END:%.*]] // CHECK39: omp_if.else: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK39: omp.inner.for.cond8: -// CHECK39-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK39-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK39: omp.inner.for.body10: -// CHECK39-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK39-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK39-NEXT: store i32 [[ADD12]], ptr [[I4]], align 4 -// CHECK39-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK39-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP23]] -// CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX13]], align 4 -// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK39: omp.body.continue14: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK39: omp.inner.for.inc15: -// CHECK39-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK39-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK39: omp.inner.for.end17: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK39: omp.inner.for.cond9: +// CHECK39-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK39-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK39: omp.inner.for.body11: +// CHECK39-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK39-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK39-NEXT: store i32 [[ADD13]], ptr [[I4]], align 4 +// CHECK39-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK39-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP29]] +// CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX14]], align 4 +// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK39: omp.body.continue15: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK39: omp.inner.for.inc16: +// CHECK39-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK39-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK39: omp.inner.for.end18: // CHECK39-NEXT: br label [[OMP_IF_END]] // CHECK39: omp_if.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK39-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK39-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK39-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK39-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: -// CHECK39-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK39-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK39-NEXT: [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK39-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] -// CHECK39-NEXT: store i32 [[ADD21]], ptr [[I4]], align 4 +// CHECK39-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK39-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK39-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK39-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK39-NEXT: store i32 [[ADD22]], ptr [[I4]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK39: .omp.final.done: // CHECK39-NEXT: br label [[OMP_PRECOND_END]] @@ -4769,6 +4884,7 @@ // CHECK39-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK39-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK39-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK39-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -4777,16 +4893,18 @@ // CHECK39-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK39-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4796,57 +4914,59 @@ // CHECK39-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK39-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK39-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK39-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK39-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK39-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK39-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK39-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK39-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK39-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK39-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: // CHECK39-NEXT: store i32 10, ptr [[I]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp @@ -171,18 +171,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -194,71 +197,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -347,18 +352,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -370,69 +378,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -737,6 +747,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -745,169 +756,177 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -975,18 +994,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -998,70 +1020,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1225,6 +1249,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1233,167 +1258,175 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1461,18 +1494,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1484,68 +1520,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -296,18 +296,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -317,59 +320,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -381,18 +386,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -402,59 +410,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -466,18 +476,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -487,76 +500,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -736,18 +751,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -757,58 +775,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -820,18 +840,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -841,58 +864,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -904,18 +929,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -925,75 +953,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1529,23 +1559,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1558,82 +1593,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1651,23 +1688,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1680,82 +1722,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1774,7 +1818,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1782,22 +1826,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1810,101 +1858,105 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -2067,18 +2119,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2088,58 +2143,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2151,18 +2208,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2172,58 +2232,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2235,18 +2297,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2256,75 +2321,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2615,23 +2682,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2644,81 +2716,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2736,23 +2810,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2765,81 +2844,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2858,7 +2939,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2866,22 +2947,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2894,100 +2979,104 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -3150,18 +3239,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3171,57 +3263,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3233,18 +3327,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3254,57 +3351,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3316,18 +3415,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3337,74 +3439,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -350,8 +350,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -360,145 +359,155 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -719,7 +728,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -728,23 +737,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -752,119 +765,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1175,8 +1192,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1185,143 +1201,153 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1542,7 +1568,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1551,23 +1577,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1575,117 +1605,121 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2562,35 +2596,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2602,68 +2634,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -163,25 +163,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -189,104 +195,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP33]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -328,6 +336,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -341,20 +350,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -362,104 +376,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -663,6 +679,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -672,21 +689,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -694,31 +717,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -728,106 +753,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP22]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1013,6 +1038,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1021,20 +1047,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1042,28 +1073,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1073,104 +1106,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1380,6 +1413,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1389,21 +1423,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1411,31 +1451,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1445,104 +1487,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP22]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1728,6 +1770,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1736,20 +1779,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1757,28 +1805,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1788,102 +1838,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -283,15 +283,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -306,6 +308,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -321,75 +325,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -494,15 +498,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -518,6 +524,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -535,72 +543,72 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -832,15 +840,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -855,6 +865,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -870,73 +882,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1041,15 +1053,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1065,6 +1079,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1082,70 +1098,70 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2097,17 +2113,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2123,66 +2141,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -145,18 +145,21 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -167,81 +170,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -331,18 +336,21 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -353,81 +361,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -523,18 +533,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -545,81 +558,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -709,18 +724,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -731,81 +749,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1050,18 +1070,21 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1073,84 +1096,86 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -172,38 +172,44 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -230,38 +236,44 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -466,8 +478,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -476,77 +487,87 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -599,24 +620,29 @@ // CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret void // // @@ -624,11 +650,11 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 @@ -642,9 +668,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 @@ -755,12 +781,12 @@ // CHECK9: omp_offload.cont7: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -883,7 +909,7 @@ // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -891,71 +917,79 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -988,24 +1022,29 @@ // CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret void // // @@ -1015,7 +1054,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1029,7 +1068,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1047,9 +1086,9 @@ // CHECK9-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK9-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1267,8 +1306,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1277,77 +1315,87 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done3: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK11-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done8: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // @@ -1400,24 +1448,29 @@ // CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK11-NEXT: ret void // // @@ -1425,11 +1478,11 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 @@ -1443,9 +1496,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 @@ -1556,12 +1609,12 @@ // CHECK11: omp_offload.cont7: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1684,7 +1737,7 @@ // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1692,71 +1745,79 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done3: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done8: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // @@ -1789,24 +1850,29 @@ // CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK11-NEXT: ret void // // @@ -1816,7 +1882,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void @@ -1830,7 +1896,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1848,9 +1914,9 @@ // CHECK11-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK11-NEXT: store ptr [[T]], ptr [[T_INDIRECT_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2018,6 +2084,7 @@ // CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2030,61 +2097,72 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined., ptr [[TMP4]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i64 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 signext [[TMP11]], ptr [[TMP12]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i64 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 signext [[TMP20]], ptr [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK17-NEXT: ret void // // @@ -2256,6 +2334,7 @@ // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 @@ -2269,74 +2348,85 @@ // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 // CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP4]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], ptr [[N_ADDR]], ptr [[TMP6]]) +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP6]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP7]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP9]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double -// CHECK17-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA7]], i64 [[TMP11]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP16]], align 128 +// CHECK17-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP11]], i64 [[TMP18]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[TMP20]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX8]], align 8 -// CHECK17-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to ppc_fp128 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[B10]], align 4 -// CHECK17-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds ppc_fp128, ptr [[TMP13]], i64 [[IDXPROM11]] -// CHECK17-NEXT: store ppc_fp128 [[CONV9]], ptr [[ARRAYIDX12]], align 16 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP15]]) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to ppc_fp128 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ppc_fp128, ptr [[TMP22]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store ppc_fp128 [[CONV2]], ptr [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) // CHECK17-NEXT: ret void // // @@ -2488,6 +2578,7 @@ // CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2500,61 +2591,72 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined., ptr [[TMP4]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i32 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], ptr nonnull align 4 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK19-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP6]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 -// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i32 [[TMP8]], i1 false) -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 [[TMP11]], ptr [[TMP12]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK19-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP15]], align 128 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 8 +// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i32 [[TMP17]], i1 false) +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i32 0 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 [[TMP20]], ptr [[TMP21]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK19-NEXT: ret void // // @@ -2724,6 +2826,7 @@ // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 @@ -2737,72 +2840,83 @@ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i32 [[TMP0]], ptr [[TMP5]], ptr [[TMP4]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], ptr [[N_ADDR]], ptr [[TMP6]]) +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[S_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], ptr nonnull align 4 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave() -// CHECK19-NEXT: store ptr [[TMP6]], ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP7]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 8 -// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i32 [[TMP9]], i1 false) -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[A]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double -// CHECK19-NEXT: [[TMP11:%.*]] = mul nsw i32 1, [[TMP3]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA7]], i32 [[TMP11]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i32 [[SUB]] -// CHECK19-NEXT: store double [[CONV]], ptr [[ARRAYIDX8]], align 8 -// CHECK19-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[B10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: store x86_fp80 [[CONV9]], ptr [[ARRAYIDX11]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP15]]) +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() +// CHECK19-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP16]], align 128 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA]], ptr align 128 [[TMP11]], i32 [[TMP18]], i1 false) +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[A]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP9]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i32 [[SUB]] +// CHECK19-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK19-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: store x86_fp80 [[CONV2]], ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_private_codegen.cpp b/clang/test/OpenMP/teams_private_codegen.cpp --- a/clang/test/OpenMP/teams_private_codegen.cpp +++ b/clang/test/OpenMP/teams_private_codegen.cpp @@ -258,18 +258,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -278,20 +281,22 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -325,26 +330,30 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 1, ptr [[G]], align 128 // CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -451,18 +460,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -471,20 +483,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -518,26 +532,30 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK3-SAME: () #[[ATTR3]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 // CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -660,15 +678,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -676,6 +696,8 @@ // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -687,18 +709,18 @@ // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK9-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -721,20 +743,20 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef signext 3) // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 @@ -772,12 +794,12 @@ // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -857,18 +879,21 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -876,21 +901,23 @@ // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK9-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK9-NEXT: ret void // // @@ -964,44 +991,48 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1025,7 +1056,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1090,29 +1121,34 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 // CHECK9-NEXT: ret void // // @@ -1124,7 +1160,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1257,15 +1293,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1273,6 +1311,8 @@ // CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -1284,18 +1324,18 @@ // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i32 4, i1 false) // CHECK11-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1318,20 +1358,20 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 @@ -1369,12 +1409,12 @@ // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1454,18 +1494,21 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1473,21 +1516,23 @@ // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK11-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK11-NEXT: ret void // // @@ -1561,44 +1606,48 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i32 4, i1 false) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1622,7 +1671,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1687,29 +1736,34 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 // CHECK11-NEXT: ret void // // @@ -1721,7 +1775,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -880,15 +880,17 @@ // CHECK1-LABEL: define {{[^@]+}}@foo6 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -900,68 +902,70 @@ // CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 7, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK1: cond.true4: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false5: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: for.end: @@ -969,14 +973,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -1874,15 +1878,17 @@ // CHECK2-LABEL: define {{[^@]+}}@foo6 // CHECK2-SAME: () #[[ATTR2]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -1894,68 +1900,70 @@ // CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK2-NEXT: store i32 7, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK2-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK2: cond.true4: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false5: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK2-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: for.end: @@ -1963,14 +1971,14 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // diff --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp --- a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp +++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 2 // Check code generation // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR @@ -13,17 +14,6 @@ extern "C" void body(...) {} -// IR-LABEL: @func( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4 -// IR-NEXT: store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4 -// IR-NEXT: store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4 -// IR-NEXT: store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4 -// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @2, i32 3, ptr @.omp_outlined., ptr %[[END_ADDR]], ptr %[[STEP_ADDR]], ptr %[[START_ADDR]]) -// IR-NEXT: ret void -// IR-NEXT: } extern "C" void func(int start, int end, int step) { #pragma omp parallel for #pragma omp unroll partial(7) @@ -32,179 +22,186 @@ } -// IR-LABEL: @.omp_outlined.( -// IR-NEXT: [[ENTRY:.*]]: -// IR-NEXT: %[[DOTGLOBAL_TID__ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[DOTBOUND_TID__ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[END_ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[STEP_ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[START_ADDR:.+]] = alloca ptr, align 8 -// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4 -// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// IR-NEXT: %[[I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4 -// IR-NEXT: %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4 -// IR-NEXT: store ptr %[[DOTGLOBAL_TID_:.+]], ptr %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: store ptr %[[DOTBOUND_TID_:.+]], ptr %[[DOTBOUND_TID__ADDR]], align 8 -// IR-NEXT: store ptr %[[END:.+]], ptr %[[END_ADDR]], align 8 -// IR-NEXT: store ptr %[[STEP:.+]], ptr %[[STEP_ADDR]], align 8 -// IR-NEXT: store ptr %[[START:.+]], ptr %[[START_ADDR]], align 8 -// IR-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[END_ADDR]], align 8 -// IR-NEXT: %[[TMP1:.+]] = load ptr, ptr %[[STEP_ADDR]], align 8 -// IR-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[START_ADDR]], align 8 -// IR-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// IR-NEXT: store i32 %[[TMP3]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP4:.+]] = load i32, ptr %[[TMP2]], align 4 -// IR-NEXT: store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP5:.+]] = load i32, ptr %[[TMP0]], align 4 -// IR-NEXT: store i32 %[[TMP5]], ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP6:.+]] = load i32, ptr %[[TMP1]], align 4 -// IR-NEXT: store i32 %[[TMP6]], ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4 -// IR-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP7]], %[[TMP8]] -// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1 -// IR-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP9]] -// IR-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP10]] -// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1 -// IR-NEXT: store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP11]], 1 -// IR-NEXT: store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP12]], -6 -// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 7 -// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1 -// IR-NEXT: store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTUNROLLED_IV_I]], align 4 -// IR-NEXT: %[[TMP13:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4 -// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]] -// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_THEN]]: -// IR-NEXT: store i32 0, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: store i32 %[[TMP14]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: store i32 1, ptr %[[DOTOMP_STRIDE]], align 4 -// IR-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// IR-NEXT: %[[TMP15:.+]] = load ptr, ptr %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: %[[TMP16:.+]] = load i32, ptr %[[TMP15]], align 4 -// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP16]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1) -// IR-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP18:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP17]], %[[TMP18]] -// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_TRUE]]: -// IR-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4 -// IR-NEXT: br label %[[COND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[COND_FALSE]]: -// IR-NEXT: %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: br label %[[COND_END]] -// IR-EMPTY: -// IR-NEXT: [[COND_END]]: -// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP19]], %[[COND_TRUE]] ], [ %[[TMP20]], %[[COND_FALSE]] ] -// IR-NEXT: store i32 %[[COND]], ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[TMP21:.+]] = load i32, ptr %[[DOTOMP_LB]], align 4 -// IR-NEXT: store i32 %[[TMP21]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_COND]]: -// IR-NEXT: %[[TMP22:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[TMP23:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4 -// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP23]], 1 -// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP22]], %[[ADD14]] -// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_BODY]]: -// IR-NEXT: %[[TMP24:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP24]], 7 -// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]] -// IR-NEXT: store i32 %[[ADD16]], ptr %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: %[[TMP25:.+]] = load i32, ptr %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: store i32 %[[TMP25]], ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_COND]]: -// IR-NEXT: %[[TMP26:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP27:.+]] = load i32, ptr %[[DOTUNROLLED_IV_I12]], align 4 -// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP27]], 7 -// IR-NEXT: %[[CMP18:.+]] = icmp ult i32 %[[TMP26]], %[[ADD17]] -// IR-NEXT: br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[LAND_RHS]]: -// IR-NEXT: %[[TMP28:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP29:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4 -// IR-NEXT: %[[ADD19:.+]] = add i32 %[[TMP29]], 1 -// IR-NEXT: %[[CMP20:.+]] = icmp ult i32 %[[TMP28]], %[[ADD19]] -// IR-NEXT: br label %[[LAND_END]] -// IR-EMPTY: -// IR-NEXT: [[LAND_END]]: -// IR-NEXT: %[[TMP30:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ] -// IR-NEXT: br i1 %[[TMP30]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_BODY]]: -// IR-NEXT: %[[TMP31:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// IR-NEXT: %[[TMP32:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[TMP33:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4 -// IR-NEXT: %[[MUL21:.+]] = mul i32 %[[TMP32]], %[[TMP33]] -// IR-NEXT: %[[ADD22:.+]] = add i32 %[[TMP31]], %[[MUL21]] -// IR-NEXT: store i32 %[[ADD22]], ptr %[[I]], align 4 -// IR-NEXT: %[[TMP34:.+]] = load i32, ptr %[[TMP2]], align 4 -// IR-NEXT: %[[TMP35:.+]] = load i32, ptr %[[TMP0]], align 4 -// IR-NEXT: %[[TMP36:.+]] = load i32, ptr %[[TMP1]], align 4 -// IR-NEXT: %[[TMP37:.+]] = load i32, ptr %[[I]], align 4 -// IR-NEXT: call void (...) @body(i32 noundef %[[TMP34]], i32 noundef %[[TMP35]], i32 noundef %[[TMP36]], i32 noundef %[[TMP37]]) -// IR-NEXT: br label %[[FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_INC]]: -// IR-NEXT: %[[TMP38:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: %[[INC:.+]] = add i32 %[[TMP38]], 1 -// IR-NEXT: store i32 %[[INC]], ptr %[[DOTUNROLL_INNER_IV_I]], align 4 -// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]] -// IR-EMPTY: -// IR-NEXT: [[FOR_END]]: -// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_BODY_CONTINUE]]: -// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_INC]]: -// IR-NEXT: %[[TMP39:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: %[[ADD23:.+]] = add i32 %[[TMP39]], 1 -// IR-NEXT: store i32 %[[ADD23]], ptr %[[DOTOMP_IV]], align 4 -// IR-NEXT: br label %[[OMP_INNER_FOR_COND]] -// IR-EMPTY: -// IR-NEXT: [[OMP_INNER_FOR_END]]: -// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// IR-EMPTY: -// IR-NEXT: [[OMP_LOOP_EXIT]]: -// IR-NEXT: %[[TMP40:.+]] = load ptr, ptr %[[DOTGLOBAL_TID__ADDR]], align 8 -// IR-NEXT: %[[TMP41:.+]] = load i32, ptr %[[TMP40]], align 4 -// IR-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP41]]) -// IR-NEXT: br label %[[OMP_PRECOND_END]] -// IR-EMPTY: -// IR-NEXT: [[OMP_PRECOND_END]]: -// IR-NEXT: ret void -// IR-NEXT: } #endif /* HEADER */ -// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]} -// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"} -// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 7} +// IR-LABEL: define dso_local void @body +// IR-SAME: (...) #[[ATTR0:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: ret void +// +// +// IR-LABEL: define dso_local void @func +// IR-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] { +// IR-NEXT: entry: +// IR-NEXT: [[START_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[END_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[STEP_ADDR:%.*]] = alloca i32, align 4 +// IR-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// IR-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 +// IR-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 +// IR-NEXT: store i32 [[STEP]], ptr [[STEP_ADDR]], align 4 +// IR-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// IR-NEXT: store ptr [[END_ADDR]], ptr [[TMP0]], align 8 +// IR-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// IR-NEXT: store ptr [[STEP_ADDR]], ptr [[TMP1]], align 8 +// IR-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// IR-NEXT: store ptr [[START_ADDR]], ptr [[TMP2]], align 8 +// IR-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// IR-NEXT: ret void +// +// +// IR-LABEL: define internal void @.omp_outlined. +// IR-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// IR-NEXT: entry: +// IR-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTUNROLLED_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTUNROLLED_IV_I11:%.*]] = alloca i32, align 4 +// IR-NEXT: [[DOTUNROLL_INNER_IV_I:%.*]] = alloca i32, align 4 +// IR-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// IR-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// IR-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// IR-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// IR-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// IR-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// IR-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// IR-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// IR-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// IR-NEXT: store i32 [[TMP7]], ptr [[I]], align 4 +// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +// IR-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-NEXT: store i32 [[TMP10]], ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[SUB:%.*]] = sub i32 [[TMP11]], [[TMP12]] +// IR-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 +// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP13]] +// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP14]] +// IR-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 +// IR-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD6:%.*]] = add i32 [[TMP15]], 1 +// IR-NEXT: store i32 [[ADD6]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[SUB8:%.*]] = sub i32 [[TMP16]], -6 +// IR-NEXT: [[DIV9:%.*]] = udiv i32 [[SUB8]], 7 +// IR-NEXT: [[SUB10:%.*]] = sub i32 [[DIV9]], 1 +// IR-NEXT: store i32 [[SUB10]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTUNROLLED_IV_I]], align 4 +// IR-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// IR-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP17]] +// IR-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// IR: omp.precond.then: +// IR-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 +// IR-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// IR-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// IR-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: [[CMP12:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] +// IR-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// IR: cond.true: +// IR-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// IR-NEXT: br label [[COND_END:%.*]] +// IR: cond.false: +// IR-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: br label [[COND_END]] +// IR: cond.end: +// IR-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] +// IR-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// IR-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// IR: omp.inner.for.cond: +// IR-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// IR-NEXT: [[ADD13:%.*]] = add i32 [[TMP27]], 1 +// IR-NEXT: [[CMP14:%.*]] = icmp ult i32 [[TMP26]], [[ADD13]] +// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// IR: omp.inner.for.body: +// IR-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[MUL:%.*]] = mul i32 [[TMP28]], 7 +// IR-NEXT: [[ADD15:%.*]] = add i32 0, [[MUL]] +// IR-NEXT: store i32 [[ADD15]], ptr [[DOTUNROLLED_IV_I11]], align 4 +// IR-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTUNROLLED_IV_I11]], align 4 +// IR-NEXT: store i32 [[TMP29]], ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND:%.*]] +// IR: for.cond: +// IR-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTUNROLLED_IV_I11]], align 4 +// IR-NEXT: [[ADD16:%.*]] = add i32 [[TMP31]], 7 +// IR-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP30]], [[ADD16]] +// IR-NEXT: br i1 [[CMP17]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] +// IR: land.rhs: +// IR-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// IR-NEXT: [[ADD18:%.*]] = add i32 [[TMP33]], 1 +// IR-NEXT: [[CMP19:%.*]] = icmp ult i32 [[TMP32]], [[ADD18]] +// IR-NEXT: br label [[LAND_END]] +// IR: land.end: +// IR-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP19]], [[LAND_RHS]] ] +// IR-NEXT: br i1 [[TMP34]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// IR: for.body: +// IR-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// IR-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 +// IR-NEXT: [[MUL20:%.*]] = mul i32 [[TMP36]], [[TMP37]] +// IR-NEXT: [[ADD21:%.*]] = add i32 [[TMP35]], [[MUL20]] +// IR-NEXT: store i32 [[ADD21]], ptr [[I]], align 4 +// IR-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP6]], align 4 +// IR-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP2]], align 4 +// IR-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP4]], align 4 +// IR-NEXT: [[TMP41:%.*]] = load i32, ptr [[I]], align 4 +// IR-NEXT: call void (...) @body(i32 noundef [[TMP38]], i32 noundef [[TMP39]], i32 noundef [[TMP40]], i32 noundef [[TMP41]]) +// IR-NEXT: br label [[FOR_INC:%.*]] +// IR: for.inc: +// IR-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: [[INC:%.*]] = add i32 [[TMP42]], 1 +// IR-NEXT: store i32 [[INC]], ptr [[DOTUNROLL_INNER_IV_I]], align 4 +// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// IR: for.end: +// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// IR: omp.body.continue: +// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// IR: omp.inner.for.inc: +// IR-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: [[ADD22:%.*]] = add i32 [[TMP43]], 1 +// IR-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 +// IR-NEXT: br label [[OMP_INNER_FOR_COND]] +// IR: omp.inner.for.end: +// IR-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// IR: omp.loop.exit: +// IR-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// IR-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// IR-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// IR-NEXT: br label [[OMP_PRECOND_END]] +// IR: omp.precond.end: +// IR-NEXT: ret void +// diff --git a/clang/test/OpenMP/vla_crash.c b/clang/test/OpenMP/vla_crash.c --- a/clang/test/OpenMP/vla_crash.c +++ b/clang/test/OpenMP/vla_crash.c @@ -27,6 +27,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -34,51 +35,58 @@ // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @a, align 4 // CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP8]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], ptr [[B]], i64 [[TMP4]], ptr [[C]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @a, align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 [[TMP7]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[ARRAYIDX3]], i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr @a, align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 0, [[TMP0]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr @a, align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ARRAYIDX1]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr @a, align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = mul nsw i64 0, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX6]], align 4 // CHECK1-NEXT: ret void // // @@ -88,6 +96,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[P:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -96,32 +105,38 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[P]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[P]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP5]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], ptr [[P]], ptr [[A_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[P:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[P_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp eq ptr [[TMP3]], [[TMP2]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp eq ptr [[TMP7]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: // CHECK1-NEXT: br label [[IF_END]] diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected @@ -34,9 +34,10 @@ // OMP-NEXT: entry: // OMP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // OMP-NEXT: store i32 0, ptr [[RETVAL]], align 4 // OMP-NEXT: store i32 0, ptr [[I]], align 4 -// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: call void @foo() // OMP-NEXT: ret i32 0 // @@ -45,6 +46,7 @@ // OMP-NEXT: entry: // OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -54,61 +56,64 @@ // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 // OMP-NEXT: store ptr [[DOTGLOBAL_TID_:%.*]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // OMP-NEXT: store ptr [[DOTBOUND_TID_:%.*]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP-NEXT: store ptr [[__CONTEXT:%.*]], ptr [[__CONTEXT_ADDR]], align 8 +// OMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // OMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // OMP-NEXT: store i32 33554431, ptr [[DOTOMP_UB]], align 4 // OMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // OMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// OMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// OMP-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 33554431 +// OMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// OMP-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 33554431 // OMP-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // OMP: cond.true: // OMP-NEXT: br label [[COND_END:%.*]] // OMP: cond.false: -// OMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // OMP-NEXT: br label [[COND_END]] // OMP: cond.end: -// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // OMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// OMP-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // OMP: omp.inner.for.cond: -// OMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // OMP-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // OMP: omp.inner.for.body: -// OMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // OMP-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// OMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// OMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // OMP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], ptr @A, i64 0, i64 [[IDXPROM]] // OMP-NEXT: store double 0.000000e+00, ptr [[ARRAYIDX]], align 8 // OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // OMP: omp.body.continue: // OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // OMP: omp.inner.for.inc: -// OMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // OMP-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND]] // OMP: omp.inner.for.end: // OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // OMP: omp.loop.exit: -// OMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // OMP-NEXT: ret void // // // OMP-LABEL: @foo( // OMP-NEXT: entry: // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // OMP-NEXT: store i32 0, ptr [[I]], align 4 -// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: ret void // // @@ -116,6 +121,7 @@ // OMP-NEXT: entry: // OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// OMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -125,53 +131,55 @@ // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 // OMP-NEXT: store ptr [[DOTGLOBAL_TID_:%.*]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // OMP-NEXT: store ptr [[DOTBOUND_TID_:%.*]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// OMP-NEXT: store ptr [[__CONTEXT:%.*]], ptr [[__CONTEXT_ADDR]], align 8 +// OMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // OMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // OMP-NEXT: store i32 33554431, ptr [[DOTOMP_UB]], align 4 // OMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // OMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// OMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// OMP-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// OMP-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 33554431 +// OMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// OMP-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 33554431 // OMP-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // OMP: cond.true: // OMP-NEXT: br label [[COND_END:%.*]] // OMP: cond.false: -// OMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // OMP-NEXT: br label [[COND_END]] // OMP: cond.end: -// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // OMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// OMP-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// OMP-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // OMP: omp.inner.for.cond: -// OMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // OMP-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // OMP: omp.inner.for.body: -// OMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // OMP-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// OMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// OMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // OMP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], ptr @A, i64 0, i64 [[IDXPROM]] // OMP-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX]], align 8 // OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // OMP: omp.body.continue: // OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // OMP: omp.inner.for.inc: -// OMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // OMP-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND]] // OMP: omp.inner.for.end: // OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // OMP: omp.loop.exit: -// OMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// OMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // OMP-NEXT: ret void // // diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected @@ -13,9 +13,10 @@ // OMP-NEXT: entry: // OMP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // OMP-NEXT: store i32 0, ptr [[RETVAL]], align 4 // OMP-NEXT: store i32 0, ptr [[I]], align 4 -// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: call void @foo() // OMP-NEXT: ret i32 0 // @@ -62,8 +63,9 @@ // OMP-LABEL: @foo( // OMP-NEXT: entry: // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // OMP-NEXT: store i32 0, ptr [[I]], align 4 -// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// OMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: ret void // // NOOMP-LABEL: @foo( diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -456,7 +456,7 @@ __OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, - VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) + VoidPtr, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) __OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) @@ -473,9 +473,8 @@ __OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy) __OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy) -__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) -__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) -__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_alloc_aggregate_arg, false, VoidPtr, VoidPtr, VoidPtr) +__OMP_RTL(__kmpc_get_shared_variables_aggregate, false, Void, VoidPtrPtr) __OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) @@ -931,9 +930,9 @@ ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(AllocatedPointer)), SizeTyExt)) -__OMP_RTL_ATTRS(__kmpc_begin_sharing_variables, AttributeSet(), AttributeSet(), - ParamAttrs(AttributeSet(), SizeTyExt)) +__OMP_RTL_ATTRS(__kmpc_alloc_aggregate_arg, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs(SExt, SizeTyExt)) __OMP_RTL_ATTRS(__kmpc_aligned_alloc, DefaultAttrs, ReturnPtrAttrs, @@ -1017,8 +1016,7 @@ ParamAttrs(AttributeSet(), SExt)) __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt, - AttributeSet(), AttributeSet(), AttributeSet(), - SizeTyExt)) + AttributeSet(), AttributeSet(), AttributeSet())) __OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt)) __OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4672,6 +4672,7 @@ case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_end_reduce_nowait: + case OMPRTL___kmpc_alloc_aggregate_arg: break; case OMPRTL___kmpc_distribute_static_init_4: case OMPRTL___kmpc_distribute_static_init_4u: diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -182,25 +182,22 @@ /// memory configured at launch. void *__kmpc_get_dynamic_shared(); -/// Allocate sufficient space for \p NumArgs sequential `void*` and store the -/// allocation address in \p GlobalArgs. -/// -/// Called by the main thread prior to a parallel region. -/// -/// We also remember it in GlobalArgsPtr to ensure the worker threads and -/// deallocation function know the allocation address too. -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs); - -/// Deallocate the memory allocated by __kmpc_begin_sharing_variables. -/// -/// Called by the main thread after a parallel region. -void __kmpc_end_sharing_variables(); +/// Return a pointer in memory to store the aggregate argument of an outlined +/// parallel region, either using a local memory allocation when in SPMD mode +/// expected in \p LocalPtr, or from shared memory, expected in \p GlobalPtr. +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr); -/// Store the allocation address obtained via __kmpc_begin_sharing_variables in +/// Store the pointer of the struct aggregating shared variables in /// \p GlobalArgs. /// /// Called by the worker threads in the parallel region (function). -void __kmpc_get_shared_variables(void ***GlobalArgs); +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs); + +/// Store the pointer of the struct aggregating shared variables in \p args +/// to team-shared memory for worker threads to access. +/// +/// Called by the main thread to initiate parallel region execution. +void __kmpc_set_shared_variables_aggregate(void *args); /// External interface to get the thread ID. uint32_t __kmpc_get_hardware_thread_id_in_block(); diff --git a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen b/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen deleted file mode 100644 --- a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen +++ /dev/null @@ -1,405 +0,0 @@ -case 0: -((void (*)(int32_t *, int32_t * -))fn)(&global_tid, &bound_tid -); -break; -case 1: -((void (*)(int32_t *, int32_t * -, void *))fn)(&global_tid, &bound_tid -, args[0]); -break; -case 2: -((void (*)(int32_t *, int32_t * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1]); -break; -case 3: -((void (*)(int32_t *, int32_t * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2]); -break; -case 4: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -); -break; -case 5: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4]); -break; -case 6: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5]); -break; -case 7: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6]); -break; -case 8: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -); -break; -case 9: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8]); -break; -case 10: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9]); -break; -case 11: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10]); -break; -case 12: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -); -break; -case 13: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12]); -break; -case 14: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13]); -break; -case 15: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14]); -break; -case 16: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -); -break; -case 17: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16]); -break; -case 18: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17]); -break; -case 19: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18]); -break; -case 20: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -); -break; -case 21: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20]); -break; -case 22: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21]); -break; -case 23: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22]); -break; -case 24: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -); -break; -case 25: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24]); -break; -case 26: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25]); -break; -case 27: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26]); -break; -case 28: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -); -break; -case 29: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28]); -break; -case 30: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29]); -break; -case 31: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30]); -break; -case 32: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30], args[31] -); -break; diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -67,16 +67,11 @@ return NumThreads; } -// Invoke an outlined parallel function unwrapping arguments (up to 32). +// Invoke an outlined parallel function. void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn, - void **args, int64_t nargs) { + void *args) { DebugEntryRAII Entry(__FILE__, __LINE__, ""); - switch (nargs) { -#include "generated_microtask_cases.gen" - default: - PRINT("Too many arguments in kmp_invoke_microtask, aborting execution.\n"); - __builtin_trap(); - } + ((void (*)(int32_t *, int32_t *, void *))fn)(&global_tid, &bound_tid, args); } } // namespace @@ -85,7 +80,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, int32_t num_threads, int proc_bind, void *fn, - void *wrapper_fn, void **args, int64_t nargs) { + void *wrapper_fn, void *args) { FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -102,7 +97,8 @@ (config::mayUseNestedParallelism() && icv::Level))) { state::DateEnvironmentRAII DERAII(ident); ++icv::Level; - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); + state::exitDataEnvironment(); return; } @@ -136,7 +132,7 @@ icv::Level.assert_eq(1u, ident, /* ForceTeamState */ true); if (TId < NumThreads) - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); // Synchronize all threads at the end of a parallel region. synchronize::threadsAligned(); @@ -162,70 +158,11 @@ bool IsActiveParallelRegion = NumThreads > 1; if (!IsActiveParallelRegion) { state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); return; } - void **GlobalArgs = nullptr; - if (nargs) { - __kmpc_begin_sharing_variables(&GlobalArgs, nargs); - switch (nargs) { - default: - for (int I = 0; I < nargs; I++) - GlobalArgs[I] = args[I]; - break; - case 16: - GlobalArgs[15] = args[15]; - [[fallthrough]]; - case 15: - GlobalArgs[14] = args[14]; - [[fallthrough]]; - case 14: - GlobalArgs[13] = args[13]; - [[fallthrough]]; - case 13: - GlobalArgs[12] = args[12]; - [[fallthrough]]; - case 12: - GlobalArgs[11] = args[11]; - [[fallthrough]]; - case 11: - GlobalArgs[10] = args[10]; - [[fallthrough]]; - case 10: - GlobalArgs[9] = args[9]; - [[fallthrough]]; - case 9: - GlobalArgs[8] = args[8]; - [[fallthrough]]; - case 8: - GlobalArgs[7] = args[7]; - [[fallthrough]]; - case 7: - GlobalArgs[6] = args[6]; - [[fallthrough]]; - case 6: - GlobalArgs[5] = args[5]; - [[fallthrough]]; - case 5: - GlobalArgs[4] = args[4]; - [[fallthrough]]; - case 4: - GlobalArgs[3] = args[3]; - [[fallthrough]]; - case 3: - GlobalArgs[2] = args[2]; - [[fallthrough]]; - case 2: - GlobalArgs[1] = args[1]; - [[fallthrough]]; - case 1: - GlobalArgs[0] = args[0]; - [[fallthrough]]; - case 0: - break; - } - } + __kmpc_set_shared_variables_aggregate(args); { // Note that the order here is important. `icv::Level` has to be updated @@ -247,9 +184,6 @@ // Master waits for workers to signal. synchronize::threads(); } - - if (nargs) - __kmpc_end_sharing_variables(); } __attribute__((noinline)) bool diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -425,41 +425,21 @@ void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } -/// Allocate storage in shared memory to communicate arguments from the main -/// thread to the workers in generic mode. If we exceed -/// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication. -constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64; - -[[clang::loader_uninitialized]] static void - *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM]; -#pragma omp allocate(SharedMemVariableSharingSpace) \ - allocator(omp_pteam_mem_alloc) -[[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr; -#pragma omp allocate(SharedMemVariableSharingSpacePtr) \ - allocator(omp_pteam_mem_alloc) - -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) { - FunctionTracingRAII(); - if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) { - SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0]; - } else { - SharedMemVariableSharingSpacePtr = (void **)memory::allocGlobal( - nArgs * sizeof(void *), "new extended args"); - ASSERT(SharedMemVariableSharingSpacePtr != nullptr && - "Nullptr returned by malloc!"); - } - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr) { + return (__kmpc_is_spmd_exec_mode() ? LocalPtr : GlobalPtr); } -void __kmpc_end_sharing_variables() { - FunctionTracingRAII(); - if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0]) - memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args"); +/// Allocate storage in shared memory to communicate the struct aggregating +/// arguments from the main thread to the workers in generic mode. +[[clang::loader_uninitialized]] static void *SharedMemAggregatePtr[1]; +#pragma omp allocate(SharedMemAggregatePtr) allocator(omp_pteam_mem_alloc) + +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs) { + *GlobalArgs = SharedMemAggregatePtr[0]; } -void __kmpc_get_shared_variables(void ***GlobalArgs) { - FunctionTracingRAII(); - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void __kmpc_set_shared_variables_aggregate(void *args) { + SharedMemAggregatePtr[0] = args; } } #pragma omp end declare target diff --git a/openmp/libomptarget/utils/generate_microtask_cases.py b/openmp/libomptarget/utils/generate_microtask_cases.py deleted file mode 100755 --- a/openmp/libomptarget/utils/generate_microtask_cases.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 - -import argparse - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--max_args', type=int, help='Max number of arguments to generate case statements for', required=True) - parser.add_argument('--output', help='Output header file to include', required=True) - args = parser.parse_args() - - output='' - for i in range(args.max_args+1): - output += 'case %d:\n'%(i) - output += '((void (*)(kmp_int32 *, kmp_int32 *\n' - for j in range(i): - output += ', void *' - if (j+1)%4 == 0: - output += '\n' - output += '))fn)(&global_tid, &bound_tid\n' - for j in range(i): - output += ', args[%d]'%(j) - if (j+1)%4 == 0: - output += '\n' - output += ');\n' - output += 'break;\n' - - with open(args.output, 'w') as f: - print(output, file=f) - -if __name__ == "__main__": - main()